diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
155 files changed, 9363 insertions, 1825 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 260e32ef7b..4536c8ad0e 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -80,7 +80,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \ amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \ amdgpu_fw_attestation.o amdgpu_securedisplay.o \ amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \ - amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o + amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o @@ -98,7 +98,7 @@ amdgpu-y += \ vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o arct_reg_init.o mxgpu_nv.o \ nbio_v7_2.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o soc21.o \ sienna_cichlid.o smu_v13_0_10.o nbio_v4_3.o hdp_v6_0.o nbio_v7_7.o hdp_v5_2.o lsdma_v6_0.o \ - nbio_v7_9.o aqua_vanjaram.o nbio_v7_11.o + nbio_v7_9.o aqua_vanjaram.o nbio_v7_11.o lsdma_v7_0.o hdp_v7_0.o nbif_v6_3_1.o # add DF block amdgpu-y += \ @@ -132,7 +132,8 @@ amdgpu-y += \ vega20_ih.o \ navi10_ih.o \ ih_v6_0.o \ - ih_v6_1.o + ih_v6_1.o \ + ih_v7_0.o # add PSP block amdgpu-y += \ @@ -143,7 +144,8 @@ amdgpu-y += \ psp_v11_0_8.o \ psp_v12_0.o \ psp_v13_0.o \ - psp_v13_0_4.o + psp_v13_0_4.o \ + psp_v14_0.o # add DCE block amdgpu-y += \ @@ -208,6 +210,7 @@ amdgpu-y += \ vcn_v4_0.o \ vcn_v4_0_3.o \ vcn_v4_0_5.o \ + vcn_v5_0_0.o \ amdgpu_jpeg.o \ jpeg_v1_0.o \ jpeg_v2_0.o \ @@ -215,7 +218,8 @@ amdgpu-y += \ jpeg_v3_0.o \ jpeg_v4_0.o \ jpeg_v4_0_3.o \ - jpeg_v4_0_5.o + jpeg_v4_0_5.o \ + jpeg_v5_0_0.o # add VPE block amdgpu-y += \ @@ -233,7 +237,8 @@ amdgpu-y += \ athub_v1_0.o \ athub_v2_0.o \ athub_v2_1.o \ - athub_v3_0.o + athub_v3_0.o \ + athub_v4_1_0.o # add SMUIO block amdgpu-y += \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 79827a6dcd..b3b8464720 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -107,6 +107,7 @@ #include "amdgpu_smuio.h" #include "amdgpu_fdinfo.h" #include "amdgpu_mca.h" +#include "amdgpu_aca.h" #include "amdgpu_ras.h" #include "amdgpu_xcp.h" #include "amdgpu_seq64.h" @@ -114,14 +115,12 @@ #define MAX_GPU_INSTANCE 64 -struct amdgpu_gpu_instance -{ +struct amdgpu_gpu_instance { struct amdgpu_device *adev; int mgpu_fan_enabled; }; -struct amdgpu_mgpu_info -{ +struct amdgpu_mgpu_info { struct amdgpu_gpu_instance gpu_ins[MAX_GPU_INSTANCE]; struct mutex mutex; uint32_t num_gpu; @@ -140,8 +139,7 @@ enum amdgpu_ss { AMDGPU_SS_DRV_UNLOAD }; -struct amdgpu_watchdog_timer -{ +struct amdgpu_watchdog_timer { bool timeout_fatal_disable; uint32_t period; /* maxCycles = (1 << period), the number of cycles before a timeout */ }; @@ -196,9 +194,10 @@ extern int amdgpu_emu_mode; extern uint amdgpu_smu_memory_pool_size; extern int amdgpu_smu_pptable_id; extern uint amdgpu_dc_feature_mask; +extern uint amdgpu_freesync_vid_mode; extern uint amdgpu_dc_debug_mask; extern uint amdgpu_dc_visual_confirm; -extern uint amdgpu_dm_abm_level; +extern int amdgpu_dm_abm_level; extern int amdgpu_backlight; extern int amdgpu_damage_clips; extern struct amdgpu_mgpu_info mgpu_info; @@ -211,6 +210,7 @@ extern int amdgpu_async_gfx_ring; extern int amdgpu_mcbp; extern int amdgpu_discovery; extern int amdgpu_mes; +extern int amdgpu_mes_log_enable; extern int amdgpu_mes_kiq; extern int amdgpu_noretry; extern int amdgpu_force_asic_type; @@ -1046,6 +1046,9 @@ struct amdgpu_device { /* MCA */ struct amdgpu_mca mca; + /* ACA */ + struct amdgpu_aca aca; + struct amdgpu_ip_block ip_blocks[AMDGPU_MAX_IP_NUM]; uint32_t harvest_ip_mask; int num_ip_blocks; @@ -1095,6 +1098,7 @@ struct amdgpu_device { long sdma_timeout; long video_timeout; long compute_timeout; + long psp_timeout; uint64_t unique_id; uint64_t df_perfmon_config_assign_mask[AMDGPU_MAX_DF_PERFMONS]; @@ -1332,6 +1336,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define WREG32_FIELD_OFFSET(reg, offset, field, val) \ WREG32(mm##reg + offset, (RREG32(mm##reg + offset) & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field)) +#define AMDGPU_GET_REG_FIELD(x, h, l) (((x) & GENMASK_ULL(h, l)) >> (l)) /* * BIOS helpers. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c new file mode 100644 index 0000000000..493982f946 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -0,0 +1,879 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/list.h> +#include "amdgpu.h" +#include "amdgpu_aca.h" +#include "amdgpu_ras.h" + +#define ACA_BANK_HWID(type, hwid, mcatype) [ACA_HWIP_TYPE_##type] = {hwid, mcatype} + +typedef int bank_handler_t(struct aca_handle *handle, struct aca_bank *bank, enum aca_error_type type, void *data); + +struct aca_banks { + int nr_banks; + struct list_head list; +}; + +struct aca_hwip { + int hwid; + int mcatype; +}; + +static struct aca_hwip aca_hwid_mcatypes[ACA_HWIP_TYPE_COUNT] = { + ACA_BANK_HWID(SMU, 0x01, 0x01), + ACA_BANK_HWID(PCS_XGMI, 0x50, 0x00), + ACA_BANK_HWID(UMC, 0x96, 0x00), +}; + +static void aca_banks_init(struct aca_banks *banks) +{ + if (!banks) + return; + + memset(banks, 0, sizeof(*banks)); + INIT_LIST_HEAD(&banks->list); +} + +static int aca_banks_add_bank(struct aca_banks *banks, struct aca_bank *bank) +{ + struct aca_bank_node *node; + + if (!bank) + return -EINVAL; + + node = kvzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return -ENOMEM; + + memcpy(&node->bank, bank, sizeof(*bank)); + + INIT_LIST_HEAD(&node->node); + list_add_tail(&node->node, &banks->list); + + banks->nr_banks++; + + return 0; +} + +static void aca_banks_release(struct aca_banks *banks) +{ + struct aca_bank_node *node, *tmp; + + list_for_each_entry_safe(node, tmp, &banks->list, node) { + list_del(&node->node); + kvfree(node); + } +} + +static int aca_smu_get_valid_aca_count(struct amdgpu_device *adev, enum aca_error_type type, u32 *count) +{ + struct amdgpu_aca *aca = &adev->aca; + const struct aca_smu_funcs *smu_funcs = aca->smu_funcs; + + if (!count) + return -EINVAL; + + if (!smu_funcs || !smu_funcs->get_valid_aca_count) + return -EOPNOTSUPP; + + return smu_funcs->get_valid_aca_count(adev, type, count); +} + +static struct aca_regs_dump { + const char *name; + int reg_idx; +} aca_regs[] = { + {"CONTROL", ACA_REG_IDX_CTL}, + {"STATUS", ACA_REG_IDX_STATUS}, + {"ADDR", ACA_REG_IDX_ADDR}, + {"MISC", ACA_REG_IDX_MISC0}, + {"CONFIG", ACA_REG_IDX_CONFG}, + {"IPID", ACA_REG_IDX_IPID}, + {"SYND", ACA_REG_IDX_SYND}, + {"DESTAT", ACA_REG_IDX_DESTAT}, + {"DEADDR", ACA_REG_IDX_DEADDR}, + {"CONTROL_MASK", ACA_REG_IDX_CTL_MASK}, +}; + +static void aca_smu_bank_dump(struct amdgpu_device *adev, int idx, int total, struct aca_bank *bank) +{ + int i; + + dev_info(adev->dev, HW_ERR "Accelerator Check Architecture events logged\n"); + /* plus 1 for output format, e.g: ACA[08/08]: xxxx */ + for (i = 0; i < ARRAY_SIZE(aca_regs); i++) + dev_info(adev->dev, HW_ERR "ACA[%02d/%02d].%s=0x%016llx\n", + idx + 1, total, aca_regs[i].name, bank->regs[aca_regs[i].reg_idx]); +} + +static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_error_type type, + int start, int count, + struct aca_banks *banks) +{ + struct amdgpu_aca *aca = &adev->aca; + const struct aca_smu_funcs *smu_funcs = aca->smu_funcs; + struct aca_bank bank; + int i, max_count, ret; + + if (!count) + return 0; + + if (!smu_funcs || !smu_funcs->get_valid_aca_bank) + return -EOPNOTSUPP; + + switch (type) { + case ACA_ERROR_TYPE_UE: + max_count = smu_funcs->max_ue_bank_count; + break; + case ACA_ERROR_TYPE_CE: + max_count = smu_funcs->max_ce_bank_count; + break; + case ACA_ERROR_TYPE_DEFERRED: + default: + return -EINVAL; + } + + if (start + count >= max_count) + return -EINVAL; + + count = min_t(int, count, max_count); + for (i = 0; i < count; i++) { + memset(&bank, 0, sizeof(bank)); + ret = smu_funcs->get_valid_aca_bank(adev, type, start + i, &bank); + if (ret) + return ret; + + aca_smu_bank_dump(adev, i, count, &bank); + + ret = aca_banks_add_bank(banks, &bank); + if (ret) + return ret; + } + + return 0; +} + +static bool aca_bank_hwip_is_matched(struct aca_bank *bank, enum aca_hwip_type type) +{ + + struct aca_hwip *hwip; + int hwid, mcatype; + u64 ipid; + + if (!bank || type == ACA_HWIP_TYPE_UNKNOW) + return false; + + hwip = &aca_hwid_mcatypes[type]; + if (!hwip->hwid) + return false; + + ipid = bank->regs[ACA_REG_IDX_IPID]; + hwid = ACA_REG__IPID__HARDWAREID(ipid); + mcatype = ACA_REG__IPID__MCATYPE(ipid); + + return hwip->hwid == hwid && hwip->mcatype == mcatype; +} + +static bool aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, enum aca_error_type type) +{ + const struct aca_bank_ops *bank_ops = handle->bank_ops; + + if (!aca_bank_hwip_is_matched(bank, handle->hwip)) + return false; + + if (!bank_ops->aca_bank_is_valid) + return true; + + return bank_ops->aca_bank_is_valid(handle, bank, type, handle->data); +} + +static struct aca_bank_error *new_bank_error(struct aca_error *aerr, struct aca_bank_info *info) +{ + struct aca_bank_error *bank_error; + + bank_error = kvzalloc(sizeof(*bank_error), GFP_KERNEL); + if (!bank_error) + return NULL; + + INIT_LIST_HEAD(&bank_error->node); + memcpy(&bank_error->info, info, sizeof(*info)); + + mutex_lock(&aerr->lock); + list_add_tail(&bank_error->node, &aerr->list); + mutex_unlock(&aerr->lock); + + return bank_error; +} + +static struct aca_bank_error *find_bank_error(struct aca_error *aerr, struct aca_bank_info *info) +{ + struct aca_bank_error *bank_error = NULL; + struct aca_bank_info *tmp_info; + bool found = false; + + mutex_lock(&aerr->lock); + list_for_each_entry(bank_error, &aerr->list, node) { + tmp_info = &bank_error->info; + if (tmp_info->socket_id == info->socket_id && + tmp_info->die_id == info->die_id) { + found = true; + goto out_unlock; + } + } + +out_unlock: + mutex_unlock(&aerr->lock); + + return found ? bank_error : NULL; +} + +static void aca_bank_error_remove(struct aca_error *aerr, struct aca_bank_error *bank_error) +{ + if (!aerr || !bank_error) + return; + + list_del(&bank_error->node); + aerr->nr_errors--; + + kvfree(bank_error); +} + +static struct aca_bank_error *get_bank_error(struct aca_error *aerr, struct aca_bank_info *info) +{ + struct aca_bank_error *bank_error; + + if (!aerr || !info) + return NULL; + + bank_error = find_bank_error(aerr, info); + if (bank_error) + return bank_error; + + return new_bank_error(aerr, info); +} + +static int aca_log_errors(struct aca_handle *handle, enum aca_error_type type, + struct aca_bank_report *report) +{ + struct aca_error_cache *error_cache = &handle->error_cache; + struct aca_bank_error *bank_error; + struct aca_error *aerr; + + if (!handle || !report) + return -EINVAL; + + if (!report->count[type]) + return 0; + + aerr = &error_cache->errors[type]; + bank_error = get_bank_error(aerr, &report->info); + if (!bank_error) + return -ENOMEM; + + bank_error->count[type] += report->count[type]; + + return 0; +} + +static int aca_generate_bank_report(struct aca_handle *handle, struct aca_bank *bank, + enum aca_error_type type, struct aca_bank_report *report) +{ + const struct aca_bank_ops *bank_ops = handle->bank_ops; + + if (!bank || !report) + return -EINVAL; + + if (!bank_ops->aca_bank_generate_report) + return -EOPNOTSUPP; + + memset(report, 0, sizeof(*report)); + return bank_ops->aca_bank_generate_report(handle, bank, type, + report, handle->data); +} + +static int handler_aca_log_bank_error(struct aca_handle *handle, struct aca_bank *bank, + enum aca_error_type type, void *data) +{ + struct aca_bank_report report; + int ret; + + ret = aca_generate_bank_report(handle, bank, type, &report); + if (ret) + return ret; + + if (!report.count[type]) + return 0; + + ret = aca_log_errors(handle, type, &report); + if (ret) + return ret; + + return 0; +} + +static int aca_dispatch_bank(struct aca_handle_manager *mgr, struct aca_bank *bank, + enum aca_error_type type, bank_handler_t handler, void *data) +{ + struct aca_handle *handle; + int ret; + + if (list_empty(&mgr->list)) + return 0; + + list_for_each_entry(handle, &mgr->list, node) { + if (!aca_bank_is_valid(handle, bank, type)) + continue; + + ret = handler(handle, bank, type, data); + if (ret) + return ret; + } + + return 0; +} + +static int aca_dispatch_banks(struct aca_handle_manager *mgr, struct aca_banks *banks, + enum aca_error_type type, bank_handler_t handler, void *data) +{ + struct aca_bank_node *node; + struct aca_bank *bank; + int ret; + + if (!mgr || !banks) + return -EINVAL; + + /* pre check to avoid unnecessary operations */ + if (list_empty(&mgr->list) || list_empty(&banks->list)) + return 0; + + list_for_each_entry(node, &banks->list, node) { + bank = &node->bank; + + ret = aca_dispatch_bank(mgr, bank, type, handler, data); + if (ret) + return ret; + } + + return 0; +} + +static int aca_banks_update(struct amdgpu_device *adev, enum aca_error_type type, + bank_handler_t handler, void *data) +{ + struct amdgpu_aca *aca = &adev->aca; + struct aca_banks banks; + u32 count = 0; + int ret; + + if (list_empty(&aca->mgr.list)) + return 0; + + /* NOTE: pmfw is only support UE and CE */ + if (type == ACA_ERROR_TYPE_DEFERRED) + type = ACA_ERROR_TYPE_CE; + + ret = aca_smu_get_valid_aca_count(adev, type, &count); + if (ret) + return ret; + + if (!count) + return 0; + + aca_banks_init(&banks); + + ret = aca_smu_get_valid_aca_banks(adev, type, 0, count, &banks); + if (ret) + goto err_release_banks; + + if (list_empty(&banks.list)) { + ret = 0; + goto err_release_banks; + } + + ret = aca_dispatch_banks(&aca->mgr, &banks, type, + handler, data); + if (ret) + goto err_release_banks; + +err_release_banks: + aca_banks_release(&banks); + + return ret; +} + +static int aca_log_aca_error_data(struct aca_bank_error *bank_error, enum aca_error_type type, struct ras_err_data *err_data) +{ + struct aca_bank_info *info; + struct amdgpu_smuio_mcm_config_info mcm_info; + u64 count; + + if (type >= ACA_ERROR_TYPE_COUNT) + return -EINVAL; + + count = bank_error->count[type]; + if (!count) + return 0; + + info = &bank_error->info; + mcm_info.die_id = info->die_id; + mcm_info.socket_id = info->socket_id; + + switch (type) { + case ACA_ERROR_TYPE_UE: + amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, count); + break; + case ACA_ERROR_TYPE_CE: + amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, NULL, count); + break; + case ACA_ERROR_TYPE_DEFERRED: + default: + break; + } + + return 0; +} + +static int aca_log_aca_error(struct aca_handle *handle, enum aca_error_type type, struct ras_err_data *err_data) +{ + struct aca_error_cache *error_cache = &handle->error_cache; + struct aca_error *aerr = &error_cache->errors[type]; + struct aca_bank_error *bank_error, *tmp; + + mutex_lock(&aerr->lock); + + if (list_empty(&aerr->list)) + goto out_unlock; + + list_for_each_entry_safe(bank_error, tmp, &aerr->list, node) { + aca_log_aca_error_data(bank_error, type, err_data); + aca_bank_error_remove(aerr, bank_error); + } + +out_unlock: + mutex_unlock(&aerr->lock); + + return 0; +} + +static int __aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle, enum aca_error_type type, + struct ras_err_data *err_data) +{ + int ret; + + /* udpate aca bank to aca source error_cache first */ + ret = aca_banks_update(adev, type, handler_aca_log_bank_error, NULL); + if (ret) + return ret; + + return aca_log_aca_error(handle, type, err_data); +} + +static bool aca_handle_is_valid(struct aca_handle *handle) +{ + if (!handle->mask || !list_empty(&handle->node)) + return false; + + return true; +} + +int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle, + enum aca_error_type type, void *data) +{ + struct ras_err_data *err_data = (struct ras_err_data *)data; + + if (!handle || !err_data) + return -EINVAL; + + if (aca_handle_is_valid(handle)) + return -EOPNOTSUPP; + + if (!(BIT(type) & handle->mask)) + return 0; + + return __aca_get_error_data(adev, handle, type, err_data); +} + +static void aca_error_init(struct aca_error *aerr, enum aca_error_type type) +{ + mutex_init(&aerr->lock); + INIT_LIST_HEAD(&aerr->list); + aerr->type = type; + aerr->nr_errors = 0; +} + +static void aca_init_error_cache(struct aca_handle *handle) +{ + struct aca_error_cache *error_cache = &handle->error_cache; + int type; + + for (type = ACA_ERROR_TYPE_UE; type < ACA_ERROR_TYPE_COUNT; type++) + aca_error_init(&error_cache->errors[type], type); +} + +static void aca_error_fini(struct aca_error *aerr) +{ + struct aca_bank_error *bank_error, *tmp; + + mutex_lock(&aerr->lock); + list_for_each_entry_safe(bank_error, tmp, &aerr->list, node) + aca_bank_error_remove(aerr, bank_error); + + mutex_destroy(&aerr->lock); +} + +static void aca_fini_error_cache(struct aca_handle *handle) +{ + struct aca_error_cache *error_cache = &handle->error_cache; + int type; + + for (type = ACA_ERROR_TYPE_UE; type < ACA_ERROR_TYPE_COUNT; type++) + aca_error_fini(&error_cache->errors[type]); +} + +static int add_aca_handle(struct amdgpu_device *adev, struct aca_handle_manager *mgr, struct aca_handle *handle, + const char *name, const struct aca_info *ras_info, void *data) +{ + memset(handle, 0, sizeof(*handle)); + + handle->adev = adev; + handle->mgr = mgr; + handle->name = name; + handle->hwip = ras_info->hwip; + handle->mask = ras_info->mask; + handle->bank_ops = ras_info->bank_ops; + handle->data = data; + aca_init_error_cache(handle); + + INIT_LIST_HEAD(&handle->node); + list_add_tail(&handle->node, &mgr->list); + mgr->nr_handles++; + + return 0; +} + +static ssize_t aca_sysfs_read(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct aca_handle *handle = container_of(attr, struct aca_handle, aca_attr); + + /* NOTE: the aca cache will be auto cleared once read, + * So the driver should unify the query entry point, forward request to ras query interface directly */ + return amdgpu_ras_aca_sysfs_read(dev, attr, handle, buf, handle->data); +} + +static int add_aca_sysfs(struct amdgpu_device *adev, struct aca_handle *handle) +{ + struct device_attribute *aca_attr = &handle->aca_attr; + + snprintf(handle->attr_name, sizeof(handle->attr_name) - 1, "aca_%s", handle->name); + aca_attr->show = aca_sysfs_read; + aca_attr->attr.name = handle->attr_name; + aca_attr->attr.mode = S_IRUGO; + sysfs_attr_init(&aca_attr->attr); + + return sysfs_add_file_to_group(&adev->dev->kobj, + &aca_attr->attr, + "ras"); +} + +int amdgpu_aca_add_handle(struct amdgpu_device *adev, struct aca_handle *handle, + const char *name, const struct aca_info *ras_info, void *data) +{ + struct amdgpu_aca *aca = &adev->aca; + int ret; + + if (!amdgpu_aca_is_enabled(adev)) + return 0; + + ret = add_aca_handle(adev, &aca->mgr, handle, name, ras_info, data); + if (ret) + return ret; + + return add_aca_sysfs(adev, handle); +} + +static void remove_aca_handle(struct aca_handle *handle) +{ + struct aca_handle_manager *mgr = handle->mgr; + + aca_fini_error_cache(handle); + list_del(&handle->node); + mgr->nr_handles--; +} + +static void remove_aca_sysfs(struct aca_handle *handle) +{ + struct amdgpu_device *adev = handle->adev; + struct device_attribute *aca_attr = &handle->aca_attr; + + if (adev->dev->kobj.sd) + sysfs_remove_file_from_group(&adev->dev->kobj, + &aca_attr->attr, + "ras"); +} + +void amdgpu_aca_remove_handle(struct aca_handle *handle) +{ + if (!handle || list_empty(&handle->node)) + return; + + remove_aca_sysfs(handle); + remove_aca_handle(handle); +} + +static int aca_manager_init(struct aca_handle_manager *mgr) +{ + INIT_LIST_HEAD(&mgr->list); + mgr->nr_handles = 0; + + return 0; +} + +static void aca_manager_fini(struct aca_handle_manager *mgr) +{ + struct aca_handle *handle, *tmp; + + list_for_each_entry_safe(handle, tmp, &mgr->list, node) + amdgpu_aca_remove_handle(handle); +} + +bool amdgpu_aca_is_enabled(struct amdgpu_device *adev) +{ + return adev->aca.is_enabled; +} + +int amdgpu_aca_init(struct amdgpu_device *adev) +{ + struct amdgpu_aca *aca = &adev->aca; + int ret; + + ret = aca_manager_init(&aca->mgr); + if (ret) + return ret; + + return 0; +} + +void amdgpu_aca_fini(struct amdgpu_device *adev) +{ + struct amdgpu_aca *aca = &adev->aca; + + aca_manager_fini(&aca->mgr); +} + +int amdgpu_aca_reset(struct amdgpu_device *adev) +{ + amdgpu_aca_fini(adev); + + return amdgpu_aca_init(adev); +} + +void amdgpu_aca_set_smu_funcs(struct amdgpu_device *adev, const struct aca_smu_funcs *smu_funcs) +{ + struct amdgpu_aca *aca = &adev->aca; + + WARN_ON(aca->smu_funcs); + aca->smu_funcs = smu_funcs; +} + +int aca_bank_info_decode(struct aca_bank *bank, struct aca_bank_info *info) +{ + u64 ipid; + u32 instidhi, instidlo; + + if (!bank || !info) + return -EINVAL; + + ipid = bank->regs[ACA_REG_IDX_IPID]; + info->hwid = ACA_REG__IPID__HARDWAREID(ipid); + info->mcatype = ACA_REG__IPID__MCATYPE(ipid); + /* + * Unfied DieID Format: SAASS. A:AID, S:Socket. + * Unfied DieID[4:4] = InstanceId[0:0] + * Unfied DieID[0:3] = InstanceIdHi[0:3] + */ + instidhi = ACA_REG__IPID__INSTANCEIDHI(ipid); + instidlo = ACA_REG__IPID__INSTANCEIDLO(ipid); + info->die_id = ((instidhi >> 2) & 0x03); + info->socket_id = ((instidlo & 0x1) << 2) | (instidhi & 0x03); + + return 0; +} + +static int aca_bank_get_error_code(struct amdgpu_device *adev, struct aca_bank *bank) +{ + int error_code; + + switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { + case IP_VERSION(13, 0, 6): + if (!(adev->flags & AMD_IS_APU) && adev->pm.fw_version >= 0x00555600) { + error_code = ACA_REG__SYND__ERRORINFORMATION(bank->regs[ACA_REG_IDX_SYND]); + return error_code & 0xff; + } + break; + default: + break; + } + + /* NOTE: the true error code is encoded in status.errorcode[0:7] */ + error_code = ACA_REG__STATUS__ERRORCODE(bank->regs[ACA_REG_IDX_STATUS]); + + return error_code & 0xff; +} + +int aca_bank_check_error_codes(struct amdgpu_device *adev, struct aca_bank *bank, int *err_codes, int size) +{ + int i, error_code; + + if (!bank || !err_codes) + return -EINVAL; + + error_code = aca_bank_get_error_code(adev, bank); + for (i = 0; i < size; i++) { + if (err_codes[i] == error_code) + return 0; + } + + return -EINVAL; +} + +int amdgpu_aca_smu_set_debug_mode(struct amdgpu_device *adev, bool en) +{ + struct amdgpu_aca *aca = &adev->aca; + const struct aca_smu_funcs *smu_funcs = aca->smu_funcs; + + if (!smu_funcs || !smu_funcs->set_debug_mode) + return -EOPNOTSUPP; + + return smu_funcs->set_debug_mode(adev, en); +} + +#if defined(CONFIG_DEBUG_FS) +static int amdgpu_aca_smu_debug_mode_set(void *data, u64 val) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)data; + int ret; + + ret = amdgpu_ras_set_aca_debug_mode(adev, val ? true : false); + if (ret) + return ret; + + dev_info(adev->dev, "amdgpu set smu aca debug mode %s success\n", val ? "on" : "off"); + + return 0; +} + +static void aca_dump_entry(struct seq_file *m, struct aca_bank *bank, enum aca_error_type type, int idx) +{ + struct aca_bank_info info; + int i, ret; + + ret = aca_bank_info_decode(bank, &info); + if (ret) + return; + + seq_printf(m, "aca entry[%d].type: %s\n", idx, type == ACA_ERROR_TYPE_UE ? "UE" : "CE"); + seq_printf(m, "aca entry[%d].info: socketid:%d aid:%d hwid:0x%03x mcatype:0x%04x\n", + idx, info.socket_id, info.die_id, info.hwid, info.mcatype); + + for (i = 0; i < ARRAY_SIZE(aca_regs); i++) + seq_printf(m, "aca entry[%d].regs[%d]: 0x%016llx\n", idx, aca_regs[i].reg_idx, bank->regs[aca_regs[i].reg_idx]); +} + +struct aca_dump_context { + struct seq_file *m; + int idx; +}; + +static int handler_aca_bank_dump(struct aca_handle *handle, struct aca_bank *bank, + enum aca_error_type type, void *data) +{ + struct aca_dump_context *ctx = (struct aca_dump_context *)data; + + aca_dump_entry(ctx->m, bank, type, ctx->idx++); + + return handler_aca_log_bank_error(handle, bank, type, NULL); +} + +static int aca_dump_show(struct seq_file *m, enum aca_error_type type) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)m->private; + struct aca_dump_context context = { + .m = m, + .idx = 0, + }; + + return aca_banks_update(adev, type, handler_aca_bank_dump, (void *)&context); +} + +static int aca_dump_ce_show(struct seq_file *m, void *unused) +{ + return aca_dump_show(m, ACA_ERROR_TYPE_CE); +} + +static int aca_dump_ce_open(struct inode *inode, struct file *file) +{ + return single_open(file, aca_dump_ce_show, inode->i_private); +} + +static const struct file_operations aca_ce_dump_debug_fops = { + .owner = THIS_MODULE, + .open = aca_dump_ce_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int aca_dump_ue_show(struct seq_file *m, void *unused) +{ + return aca_dump_show(m, ACA_ERROR_TYPE_UE); +} + +static int aca_dump_ue_open(struct inode *inode, struct file *file) +{ + return single_open(file, aca_dump_ue_show, inode->i_private); +} + +static const struct file_operations aca_ue_dump_debug_fops = { + .owner = THIS_MODULE, + .open = aca_dump_ue_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +DEFINE_DEBUGFS_ATTRIBUTE(aca_debug_mode_fops, NULL, amdgpu_aca_smu_debug_mode_set, "%llu\n"); +#endif + +void amdgpu_aca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root) +{ +#if defined(CONFIG_DEBUG_FS) + if (!root || adev->ip_versions[MP1_HWIP][0] != IP_VERSION(13, 0, 6)) + return; + + debugfs_create_file("aca_debug_mode", 0200, root, adev, &aca_debug_mode_fops); + debugfs_create_file("aca_ue_dump", 0400, root, adev, &aca_ue_dump_debug_fops); + debugfs_create_file("aca_ce_dump", 0400, root, adev, &aca_ce_dump_debug_fops); +#endif +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h new file mode 100644 index 0000000000..2da50e0958 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h @@ -0,0 +1,202 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_ACA_H__ +#define __AMDGPU_ACA_H__ + +#include <linux/list.h> + +#define ACA_MAX_REGS_COUNT (16) + +#define ACA_REG_FIELD(x, h, l) (((x) & GENMASK_ULL(h, l)) >> l) +#define ACA_REG__STATUS__VAL(x) ACA_REG_FIELD(x, 63, 63) +#define ACA_REG__STATUS__OVERFLOW(x) ACA_REG_FIELD(x, 62, 62) +#define ACA_REG__STATUS__UC(x) ACA_REG_FIELD(x, 61, 61) +#define ACA_REG__STATUS__EN(x) ACA_REG_FIELD(x, 60, 60) +#define ACA_REG__STATUS__MISCV(x) ACA_REG_FIELD(x, 59, 59) +#define ACA_REG__STATUS__ADDRV(x) ACA_REG_FIELD(x, 58, 58) +#define ACA_REG__STATUS__PCC(x) ACA_REG_FIELD(x, 57, 57) +#define ACA_REG__STATUS__ERRCOREIDVAL(x) ACA_REG_FIELD(x, 56, 56) +#define ACA_REG__STATUS__TCC(x) ACA_REG_FIELD(x, 55, 55) +#define ACA_REG__STATUS__SYNDV(x) ACA_REG_FIELD(x, 53, 53) +#define ACA_REG__STATUS__CECC(x) ACA_REG_FIELD(x, 46, 46) +#define ACA_REG__STATUS__UECC(x) ACA_REG_FIELD(x, 45, 45) +#define ACA_REG__STATUS__DEFERRED(x) ACA_REG_FIELD(x, 44, 44) +#define ACA_REG__STATUS__POISON(x) ACA_REG_FIELD(x, 43, 43) +#define ACA_REG__STATUS__SCRUB(x) ACA_REG_FIELD(x, 40, 40) +#define ACA_REG__STATUS__ERRCOREID(x) ACA_REG_FIELD(x, 37, 32) +#define ACA_REG__STATUS__ADDRLSB(x) ACA_REG_FIELD(x, 29, 24) +#define ACA_REG__STATUS__ERRORCODEEXT(x) ACA_REG_FIELD(x, 21, 16) +#define ACA_REG__STATUS__ERRORCODE(x) ACA_REG_FIELD(x, 15, 0) + +#define ACA_REG__IPID__MCATYPE(x) ACA_REG_FIELD(x, 63, 48) +#define ACA_REG__IPID__INSTANCEIDHI(x) ACA_REG_FIELD(x, 47, 44) +#define ACA_REG__IPID__HARDWAREID(x) ACA_REG_FIELD(x, 43, 32) +#define ACA_REG__IPID__INSTANCEIDLO(x) ACA_REG_FIELD(x, 31, 0) + +#define ACA_REG__MISC0__VALID(x) ACA_REG_FIELD(x, 63, 63) +#define ACA_REG__MISC0__OVRFLW(x) ACA_REG_FIELD(x, 48, 48) +#define ACA_REG__MISC0__ERRCNT(x) ACA_REG_FIELD(x, 43, 32) + +#define ACA_REG__SYND__ERRORINFORMATION(x) ACA_REG_FIELD(x, 17, 0) + +/* NOTE: The following codes refers to the smu header file */ +#define ACA_EXTERROR_CODE_CE 0x3a +#define ACA_EXTERROR_CODE_FAULT 0x3b + +#define ACA_ERROR_UE_MASK BIT_MASK(ACA_ERROR_TYPE_UE) +#define ACA_ERROR_CE_MASK BIT_MASK(ACA_ERROR_TYPE_CE) +#define ACA_ERROR_DEFERRED_MASK BIT_MASK(ACA_ERROR_TYPE_DEFERRED) + +enum aca_reg_idx { + ACA_REG_IDX_CTL = 0, + ACA_REG_IDX_STATUS = 1, + ACA_REG_IDX_ADDR = 2, + ACA_REG_IDX_MISC0 = 3, + ACA_REG_IDX_CONFG = 4, + ACA_REG_IDX_IPID = 5, + ACA_REG_IDX_SYND = 6, + ACA_REG_IDX_DESTAT = 8, + ACA_REG_IDX_DEADDR = 9, + ACA_REG_IDX_CTL_MASK = 10, + ACA_REG_IDX_COUNT = 16, +}; + +enum aca_hwip_type { + ACA_HWIP_TYPE_UNKNOW = -1, + ACA_HWIP_TYPE_PSP = 0, + ACA_HWIP_TYPE_UMC, + ACA_HWIP_TYPE_SMU, + ACA_HWIP_TYPE_PCS_XGMI, + ACA_HWIP_TYPE_COUNT, +}; + +enum aca_error_type { + ACA_ERROR_TYPE_INVALID = -1, + ACA_ERROR_TYPE_UE = 0, + ACA_ERROR_TYPE_CE, + ACA_ERROR_TYPE_DEFERRED, + ACA_ERROR_TYPE_COUNT +}; + +struct aca_bank { + u64 regs[ACA_MAX_REGS_COUNT]; +}; + +struct aca_bank_node { + struct aca_bank bank; + struct list_head node; +}; + +struct aca_bank_info { + int die_id; + int socket_id; + int hwid; + int mcatype; +}; + +struct aca_bank_report { + struct aca_bank_info info; + u64 count[ACA_ERROR_TYPE_COUNT]; +}; + +struct aca_bank_error { + struct list_head node; + struct aca_bank_info info; + u64 count[ACA_ERROR_TYPE_COUNT]; +}; + +struct aca_error { + struct list_head list; + struct mutex lock; + enum aca_error_type type; + int nr_errors; +}; + +struct aca_handle_manager { + struct list_head list; + int nr_handles; +}; + +struct aca_error_cache { + struct aca_error errors[ACA_ERROR_TYPE_COUNT]; +}; + +struct aca_handle { + struct list_head node; + enum aca_hwip_type hwip; + struct amdgpu_device *adev; + struct aca_handle_manager *mgr; + struct aca_error_cache error_cache; + const struct aca_bank_ops *bank_ops; + struct device_attribute aca_attr; + char attr_name[64]; + const char *name; + u32 mask; + void *data; +}; + +struct aca_bank_ops { + int (*aca_bank_generate_report)(struct aca_handle *handle, struct aca_bank *bank, enum aca_error_type type, + struct aca_bank_report *report, void *data); + bool (*aca_bank_is_valid)(struct aca_handle *handle, struct aca_bank *bank, enum aca_error_type type, + void *data); +}; + +struct aca_smu_funcs { + int max_ue_bank_count; + int max_ce_bank_count; + int (*set_debug_mode)(struct amdgpu_device *adev, bool enable); + int (*get_valid_aca_count)(struct amdgpu_device *adev, enum aca_error_type type, u32 *count); + int (*get_valid_aca_bank)(struct amdgpu_device *adev, enum aca_error_type type, int idx, struct aca_bank *bank); +}; + +struct amdgpu_aca { + struct aca_handle_manager mgr; + const struct aca_smu_funcs *smu_funcs; + bool is_enabled; +}; + +struct aca_info { + enum aca_hwip_type hwip; + const struct aca_bank_ops *bank_ops; + u32 mask; +}; + +int amdgpu_aca_init(struct amdgpu_device *adev); +void amdgpu_aca_fini(struct amdgpu_device *adev); +int amdgpu_aca_reset(struct amdgpu_device *adev); +void amdgpu_aca_set_smu_funcs(struct amdgpu_device *adev, const struct aca_smu_funcs *smu_funcs); +bool amdgpu_aca_is_enabled(struct amdgpu_device *adev); + +int aca_bank_info_decode(struct aca_bank *bank, struct aca_bank_info *info); +int aca_bank_check_error_codes(struct amdgpu_device *adev, struct aca_bank *bank, int *err_codes, int size); + +int amdgpu_aca_add_handle(struct amdgpu_device *adev, struct aca_handle *handle, + const char *name, const struct aca_info *aca_info, void *data); +void amdgpu_aca_remove_handle(struct aca_handle *handle); +int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle, + enum aca_error_type type, void *data); +int amdgpu_aca_smu_set_debug_mode(struct amdgpu_device *adev, bool en); +void amdgpu_aca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root); +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 131983ed43..35dd6effa9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -742,9 +742,15 @@ void amdgpu_amdkfd_debug_mem_fence(struct amdgpu_device *adev) amdgpu_device_flush_hdp(adev, NULL); } -void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, bool reset) +bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev) { - amdgpu_umc_poison_handler(adev, reset); + return amdgpu_ras_get_fed_status(adev); +} + +void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, + enum amdgpu_ras_block block, bool reset) +{ + amdgpu_umc_poison_handler(adev, block, reset); } int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 27c61c535e..0ef223c2af 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -193,6 +193,9 @@ struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f); int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo); int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni, unsigned long cur_seq, struct kgd_mem *mem); +int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo, + uint32_t domain, + struct dma_fence *fence); #else static inline bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm) @@ -218,6 +221,13 @@ int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni, { return 0; } +static inline +int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo, + uint32_t domain, + struct dma_fence *fence) +{ + return 0; +} #endif /* Shared API */ int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size, @@ -310,7 +320,7 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem, void **kptr, uint64_t *size); void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem); -int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo); +int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_bo *bo); int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info, struct dma_fence __rcu **ef); @@ -326,7 +336,8 @@ void amdgpu_amdkfd_debug_mem_fence(struct amdgpu_device *adev); int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev, struct tile_config *config); void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, - bool reset); + enum amdgpu_ras_block block, bool reset); +bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev); bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem *mem); void amdgpu_amdkfd_block_mmu_notifications(void *p); int amdgpu_amdkfd_criu_resume(void *p); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index b1e2dd52e6..0535b07987 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -426,9 +426,9 @@ validate_fail: return ret; } -static int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo, - uint32_t domain, - struct dma_fence *fence) +int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo, + uint32_t domain, + struct dma_fence *fence) { int ret = amdgpu_bo_reserve(bo, false); @@ -464,13 +464,15 @@ static int amdgpu_amdkfd_validate_vm_bo(void *_unused, struct amdgpu_bo *bo) * again. Page directories are only updated after updating page * tables. */ -static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm) +static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm, + struct ww_acquire_ctx *ticket) { struct amdgpu_bo *pd = vm->root.bo; struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); int ret; - ret = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_amdkfd_validate_vm_bo, NULL); + ret = amdgpu_vm_validate(adev, vm, ticket, + amdgpu_amdkfd_validate_vm_bo, NULL); if (ret) { pr_err("failed to validate PT BOs\n"); return ret; @@ -1186,7 +1188,8 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, int ret; ctx->sync = &mem->sync; - drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); + drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT | + DRM_EXEC_IGNORE_DUPLICATES, 0); drm_exec_until_all_locked(&ctx->exec) { ctx->n_vms = 0; list_for_each_entry(entry, &mem->attachments, list) { @@ -1310,14 +1313,15 @@ update_gpuvm_pte_failed: return ret; } -static int process_validate_vms(struct amdkfd_process_info *process_info) +static int process_validate_vms(struct amdkfd_process_info *process_info, + struct ww_acquire_ctx *ticket) { struct amdgpu_vm *peer_vm; int ret; list_for_each_entry(peer_vm, &process_info->vm_list_head, vm_list_node) { - ret = vm_validate_pt_pd_bos(peer_vm); + ret = vm_validate_pt_pd_bos(peer_vm, ticket); if (ret) return ret; } @@ -1402,7 +1406,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, ret = amdgpu_bo_reserve(vm->root.bo, true); if (ret) goto reserve_pd_fail; - ret = vm_validate_pt_pd_bos(vm); + ret = vm_validate_pt_pd_bos(vm, NULL); if (ret) { pr_err("validate_pt_pd_bos() failed\n"); goto validate_pd_fail; @@ -2044,7 +2048,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( bo->tbo.resource->mem_type == TTM_PL_SYSTEM) is_invalid_userptr = true; - ret = vm_validate_pt_pd_bos(avm); + ret = vm_validate_pt_pd_bos(avm, NULL); if (unlikely(ret)) goto out_unreserve; @@ -2137,7 +2141,7 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( goto unreserve_out; } - ret = vm_validate_pt_pd_bos(avm); + ret = vm_validate_pt_pd_bos(avm, NULL); if (unlikely(ret)) goto unreserve_out; @@ -2187,13 +2191,12 @@ int amdgpu_amdkfd_gpuvm_sync_memory( /** * amdgpu_amdkfd_map_gtt_bo_to_gart - Map BO to GART and increment reference count - * @adev: Device to which allocated BO belongs * @bo: Buffer object to be mapped * * Before return, bo reference count is incremented. To release the reference and unpin/ * unmap the BO, call amdgpu_amdkfd_free_gtt_mem. */ -int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo) +int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_bo *bo) { int ret; @@ -2635,7 +2638,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) } } - ret = process_validate_vms(process_info); + ret = process_validate_vms(process_info, NULL); if (ret) goto unreserve_out; @@ -2868,14 +2871,16 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu * mutex_lock(&process_info->lock); - drm_exec_init(&exec, 0, 0); + drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0); drm_exec_until_all_locked(&exec) { list_for_each_entry(peer_vm, &process_info->vm_list_head, vm_list_node) { ret = amdgpu_vm_lock_pd(peer_vm, &exec, 2); drm_exec_retry_on_contention(&exec); - if (unlikely(ret)) + if (unlikely(ret)) { + pr_err("Locking VM PD failed, ret: %d\n", ret); goto ttm_reserve_fail; + } } /* Reserve all BOs and page tables/directory. Add all BOs from @@ -2888,25 +2893,21 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu * gobj = &mem->bo->tbo.base; ret = drm_exec_prepare_obj(&exec, gobj, 1); drm_exec_retry_on_contention(&exec); - if (unlikely(ret)) + if (unlikely(ret)) { + pr_err("drm_exec_prepare_obj failed, ret: %d\n", ret); goto ttm_reserve_fail; + } } } amdgpu_sync_create(&sync_obj); - /* Validate PDs and PTs */ - ret = process_validate_vms(process_info); - if (ret) - goto validate_map_fail; - - /* Validate BOs and map them to GPUVM (update VM page tables). */ + /* Validate BOs managed by KFD */ list_for_each_entry(mem, &process_info->kfd_bo_list, validate_list) { struct amdgpu_bo *bo = mem->bo; uint32_t domain = mem->domain; - struct kfd_mem_attachment *attachment; struct dma_resv_iter cursor; struct dma_fence *fence; @@ -2931,6 +2932,25 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu * goto validate_map_fail; } } + } + + if (failed_size) + pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size); + + /* Validate PDs, PTs and evicted DMABuf imports last. Otherwise BO + * validations above would invalidate DMABuf imports again. + */ + ret = process_validate_vms(process_info, &exec.ticket); + if (ret) { + pr_debug("Validating VMs failed, ret: %d\n", ret); + goto validate_map_fail; + } + + /* Update mappings managed by KFD. */ + list_for_each_entry(mem, &process_info->kfd_bo_list, + validate_list) { + struct kfd_mem_attachment *attachment; + list_for_each_entry(attachment, &mem->attachments, list) { if (!attachment->is_mapped) continue; @@ -2947,9 +2967,6 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu * } } - if (failed_size) - pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size); - /* Update mappings not managed by KFD */ list_for_each_entry(peer_vm, &process_info->vm_list_head, vm_list_node) { @@ -3021,7 +3038,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu * &process_info->eviction_fence->base, DMA_RESV_USAGE_BOOKKEEP); } - /* Attach eviction fence to PD / PT BOs */ + /* Attach eviction fence to PD / PT BOs and DMABuf imports */ list_for_each_entry(peer_vm, &process_info->vm_list_head, vm_list_node) { struct amdgpu_bo *bo = peer_vm->root.bo; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index dce9e7d5e4..52b12c1718 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -1018,7 +1018,8 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev, if (clock_type == COMPUTE_ENGINE_PLL_PARAM) { args.v3.ulClockParams = cpu_to_le32((clock_type << 24) | clock); - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, + sizeof(args)); dividers->post_div = args.v3.ucPostDiv; dividers->enable_post_div = (args.v3.ucCntlFlag & @@ -1038,7 +1039,8 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev, if (strobe_mode) args.v5.ucInputFlag = ATOM_PLL_INPUT_FLAG_PLL_STROBE_MODE_EN; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, + sizeof(args)); dividers->post_div = args.v5.ucPostDiv; dividers->enable_post_div = (args.v5.ucCntlFlag & @@ -1056,7 +1058,8 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev, /* fusion */ args.v4.ulClock = cpu_to_le32(clock); /* 10 khz */ - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, + sizeof(args)); dividers->post_divider = dividers->post_div = args.v4.ucPostDiv; dividers->real_clock = le32_to_cpu(args.v4.ulClock); @@ -1067,7 +1070,8 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev, args.v6_in.ulClock.ulComputeClockFlag = clock_type; args.v6_in.ulClock.ulClockFreq = cpu_to_le32(clock); /* 10 khz */ - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, + sizeof(args)); dividers->whole_fb_div = le16_to_cpu(args.v6_out.ulFbDiv.usFbDiv); dividers->frac_fb_div = le16_to_cpu(args.v6_out.ulFbDiv.usFbDivFrac); @@ -1109,7 +1113,8 @@ int amdgpu_atombios_get_memory_pll_dividers(struct amdgpu_device *adev, if (strobe_mode) args.ucInputFlag |= MPLL_INPUT_FLAG_STROBE_MODE_EN; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, + sizeof(args)); mpll_param->clkfrac = le16_to_cpu(args.ulFbDiv.usFbDivFrac); mpll_param->clkf = le16_to_cpu(args.ulFbDiv.usFbDiv); @@ -1151,7 +1156,8 @@ void amdgpu_atombios_set_engine_dram_timings(struct amdgpu_device *adev, if (mem_clock) args.sReserved.ulClock = cpu_to_le32(mem_clock & SET_CLOCK_FREQ_MASK); - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, + sizeof(args)); } void amdgpu_atombios_get_default_voltages(struct amdgpu_device *adev, @@ -1205,7 +1211,8 @@ int amdgpu_atombios_get_max_vddc(struct amdgpu_device *adev, u8 voltage_type, args.v2.ucVoltageMode = 0; args.v2.usVoltageLevel = 0; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, + sizeof(args)); *voltage = le16_to_cpu(args.v2.usVoltageLevel); break; @@ -1214,7 +1221,8 @@ int amdgpu_atombios_get_max_vddc(struct amdgpu_device *adev, u8 voltage_type, args.v3.ucVoltageMode = ATOM_GET_VOLTAGE_LEVEL; args.v3.usVoltageLevel = cpu_to_le16(voltage_id); - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, + sizeof(args)); *voltage = le16_to_cpu(args.v3.usVoltageLevel); break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index fb2681dd6b..c8c23dbb90 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -211,6 +211,7 @@ union igp_info { struct atom_integrated_system_info_v1_11 v11; struct atom_integrated_system_info_v1_12 v12; struct atom_integrated_system_info_v2_1 v21; + struct atom_integrated_system_info_v2_3 v23; }; union umc_info { @@ -359,6 +360,20 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev, if (vram_type) *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); break; + case 3: + mem_channel_number = igp_info->v23.umachannelnumber; + if (!mem_channel_number) + mem_channel_number = 1; + mem_type = igp_info->v23.memorytype; + if (mem_type == LpDdr5MemType) + mem_channel_width = 32; + else + mem_channel_width = 64; + if (vram_width) + *vram_width = mem_channel_number * mem_channel_width; + if (vram_type) + *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); + break; default: return -EINVAL; } @@ -941,5 +956,6 @@ int amdgpu_atomfirmware_asic_init(struct amdgpu_device *adev, bool fb_reset) return -EINVAL; } - return amdgpu_atom_execute_table(ctx, ATOM_CMD_INIT, (uint32_t *)&asic_init_ps_v2_1); + return amdgpu_atom_execute_table(ctx, ATOM_CMD_INIT, (uint32_t *)&asic_init_ps_v2_1, + sizeof(asic_init_ps_v2_1)); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h index c7eb2caec6..649b5530d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h @@ -36,7 +36,7 @@ int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev); int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev); bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev); bool amdgpu_atomfirmware_sram_ecc_supported(struct amdgpu_device *adev); -bool amdgpu_atomfirmware_ras_rom_addr(struct amdgpu_device *adev, uint8_t* i2c_address); +bool amdgpu_atomfirmware_ras_rom_addr(struct amdgpu_device *adev, uint8_t *i2c_address); bool amdgpu_atomfirmware_mem_training_supported(struct amdgpu_device *adev); bool amdgpu_atomfirmware_dynamic_boot_config_supported(struct amdgpu_device *adev); int amdgpu_atomfirmware_get_fw_reserved_fb_size(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 116d3756c9..ec888fc6ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -952,10 +952,10 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, p->bytes_moved = 0; p->bytes_moved_vis = 0; - r = amdgpu_vm_validate_pt_bos(p->adev, &fpriv->vm, - amdgpu_cs_bo_validate, p); + r = amdgpu_vm_validate(p->adev, &fpriv->vm, NULL, + amdgpu_cs_bo_validate, p); if (r) { - DRM_ERROR("amdgpu_vm_validate_pt_bos() failed.\n"); + DRM_ERROR("amdgpu_vm_validate() failed.\n"); goto out_free_user_pages; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c index 796fa6f142..cfdf558b48 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c @@ -28,9 +28,8 @@ uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev) { - uint64_t addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT; + uint64_t addr = AMDGPU_VA_RESERVED_CSA_START(adev); - addr -= AMDGPU_VA_RESERVED_SIZE; addr = amdgpu_gmc_sign_extend(addr); return addr; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 1afbb2e932..f5d0fa207a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1782,9 +1782,14 @@ static int amdgpu_debugfs_vm_info_show(struct seq_file *m, void *unused) list_for_each_entry(file, &dev->filelist, lhead) { struct amdgpu_fpriv *fpriv = file->driver_priv; struct amdgpu_vm *vm = &fpriv->vm; + struct amdgpu_task_info *ti; + + ti = amdgpu_vm_get_task_info_vm(vm); + if (ti) { + seq_printf(m, "pid:%d\tProcess:%s ----------\n", ti->pid, ti->process_name); + amdgpu_vm_put_task_info(ti); + } - seq_printf(m, "pid:%d\tProcess:%s ----------\n", - vm->task_info.pid, vm->task_info.process_name); r = amdgpu_bo_reserve(vm->root.bo, true); if (r) break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 14d8786755..941d6e379b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -96,6 +96,9 @@ MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin"); #define AMDGPU_RESUME_MS 2000 #define AMDGPU_MAX_RETRY_LIMIT 2 #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL) +#define AMDGPU_PCIE_INDEX_FALLBACK (0x38 >> 2) +#define AMDGPU_PCIE_INDEX_HI_FALLBACK (0x44 >> 2) +#define AMDGPU_PCIE_DATA_FALLBACK (0x3C >> 2) static const struct drm_driver amdgpu_kms_driver; @@ -781,12 +784,22 @@ u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev, void __iomem *pcie_index_hi_offset; void __iomem *pcie_data_offset; - pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); - pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); - if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset)) - pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev); - else + if (unlikely(!adev->nbio.funcs)) { + pcie_index = AMDGPU_PCIE_INDEX_FALLBACK; + pcie_data = AMDGPU_PCIE_DATA_FALLBACK; + } else { + pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); + pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); + } + + if (reg_addr >> 32) { + if (unlikely(!adev->nbio.funcs)) + pcie_index_hi = AMDGPU_PCIE_INDEX_HI_FALLBACK; + else + pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev); + } else { pcie_index_hi = 0; + } spin_lock_irqsave(&adev->pcie_idx_lock, flags); pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; @@ -1218,8 +1231,6 @@ static int amdgpu_device_asic_init(struct amdgpu_device *adev) amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) { amdgpu_psp_wait_for_bootloader(adev); ret = amdgpu_atomfirmware_asic_init(adev, true); - /* TODO: check the return val and stop device initialization if boot fails */ - amdgpu_psp_query_boot_status(adev); return ret; } else { return amdgpu_atom_asic_init(adev->mode_info.atom_context); @@ -1442,6 +1453,10 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) return 0; + /* PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 */ + if (!pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_VNDR)) + DRM_WARN("System can't access extended configuration space,please check!!\n"); + /* skip if the bios has already enabled large BAR */ if (adev->gmc.real_vram_size && (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size)) @@ -4039,13 +4054,13 @@ int amdgpu_device_init(struct amdgpu_device *adev, return r; } + amdgpu_device_set_mcbp(adev); + /* early init functions */ r = amdgpu_device_ip_early_init(adev); if (r) return r; - amdgpu_device_set_mcbp(adev); - /* Get rid of things like offb */ r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver); if (r) @@ -5312,6 +5327,7 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, list_for_each_entry(tmp_adev, device_list_handle, reset_list) { if (need_full_reset) { /* post card */ + amdgpu_ras_set_fed(tmp_adev, false); r = amdgpu_device_asic_init(tmp_adev); if (r) { dev_warn(tmp_adev->dev, "asic atom init failed!"); @@ -5692,6 +5708,7 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */ /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3)) amdgpu_ras_resume(adev); } else { @@ -5792,13 +5809,18 @@ static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev, *speed = PCI_SPEED_UNKNOWN; *width = PCIE_LNK_WIDTH_UNKNOWN; - while ((parent = pci_upstream_bridge(parent))) { - /* skip upstream/downstream switches internal to dGPU*/ - if (parent->vendor == PCI_VENDOR_ID_ATI) - continue; - *speed = pcie_get_speed_cap(parent); - *width = pcie_get_width_cap(parent); - break; + if (amdgpu_device_pcie_dynamic_switching_supported(adev)) { + while ((parent = pci_upstream_bridge(parent))) { + /* skip upstream/downstream switches internal to dGPU*/ + if (parent->vendor == PCI_VENDOR_ID_ATI) + continue; + *speed = pcie_get_speed_cap(parent); + *width = pcie_get_width_cap(parent); + break; + } + } else { + /* use the current speeds rather than max if switching is not supported */ + pcie_bandwidth_available(adev->pdev, NULL, speed, width); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index ff28265838..ac5bf01fe8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -27,6 +27,7 @@ #include "amdgpu_discovery.h" #include "soc15_hw_ip.h" #include "discovery.h" +#include "amdgpu_ras.h" #include "soc15.h" #include "gfx_v9_0.h" @@ -60,20 +61,24 @@ #include "nbio_v4_3.h" #include "nbio_v7_2.h" #include "nbio_v7_7.h" +#include "nbif_v6_3_1.h" #include "hdp_v5_0.h" #include "hdp_v5_2.h" #include "hdp_v6_0.h" +#include "hdp_v7_0.h" #include "nv.h" #include "soc21.h" #include "navi10_ih.h" #include "ih_v6_0.h" #include "ih_v6_1.h" +#include "ih_v7_0.h" #include "gfx_v10_0.h" #include "gfx_v11_0.h" #include "sdma_v5_0.h" #include "sdma_v5_2.h" #include "sdma_v6_0.h" #include "lsdma_v6_0.h" +#include "lsdma_v7_0.h" #include "vcn_v2_0.h" #include "jpeg_v2_0.h" #include "vcn_v3_0.h" @@ -92,12 +97,15 @@ #include "smuio_v13_0.h" #include "smuio_v13_0_3.h" #include "smuio_v13_0_6.h" +#include "vcn_v5_0_0.h" +#include "jpeg_v5_0_0.h" #include "amdgpu_vpe.h" #define FIRMWARE_IP_DISCOVERY "amdgpu/ip_discovery.bin" MODULE_FIRMWARE(FIRMWARE_IP_DISCOVERY); +#define mmIP_DISCOVERY_VERSION 0x16A00 #define mmRCC_CONFIG_MEMSIZE 0xde3 #define mmMP0_SMN_C2PMSG_33 0x16061 #define mmMM_INDEX 0x0 @@ -518,7 +526,9 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev) out: kfree(adev->mman.discovery_bin); adev->mman.discovery_bin = NULL; - + if ((amdgpu_discovery != 2) && + (RREG32(mmIP_DISCOVERY_VERSION) == 4)) + amdgpu_ras_query_boot_status(adev, 4); return r; } @@ -1310,6 +1320,15 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) } } + if (le16_to_cpu(ip->hw_id) == VPE_HWID) { + if (adev->vpe.num_instances < AMDGPU_MAX_VPE_INSTANCES) + adev->vpe.num_instances++; + else + dev_err(adev->dev, "Too many VPE instances: %d vs %d\n", + adev->vpe.num_instances + 1, + AMDGPU_MAX_VPE_INSTANCES); + } + if (le16_to_cpu(ip->hw_id) == UMC_HWID) { adev->gmc.num_umc++; adev->umc.node_inst_num++; @@ -1674,6 +1693,7 @@ static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 0, 3): case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): amdgpu_device_ip_block_add(adev, &soc21_common_ip_block); break; default: @@ -1721,6 +1741,7 @@ static int amdgpu_discovery_set_gmc_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 0, 3): case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): amdgpu_device_ip_block_add(adev, &gmc_v11_0_ip_block); break; default: @@ -1763,6 +1784,9 @@ static int amdgpu_discovery_set_ih_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(6, 1, 0): amdgpu_device_ip_block_add(adev, &ih_v6_1_ip_block); break; + case IP_VERSION(7, 0, 0): + amdgpu_device_ip_block_add(adev, &ih_v7_0_ip_block); + break; default: dev_err(adev->dev, "Failed to add ih ip block(OSSSYS_HWIP:0x%x)\n", @@ -1812,11 +1836,16 @@ static int amdgpu_discovery_set_psp_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(13, 0, 10): case IP_VERSION(13, 0, 11): case IP_VERSION(14, 0, 0): + case IP_VERSION(14, 0, 1): amdgpu_device_ip_block_add(adev, &psp_v13_0_ip_block); break; case IP_VERSION(13, 0, 4): amdgpu_device_ip_block_add(adev, &psp_v13_0_4_ip_block); break; + case IP_VERSION(14, 0, 2): + case IP_VERSION(14, 0, 3): + amdgpu_device_ip_block_add(adev, &psp_v14_0_ip_block); + break; default: dev_err(adev->dev, "Failed to add psp ip block(MP0_HWIP:0x%x)\n", @@ -1918,6 +1947,7 @@ static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(3, 2, 0): case IP_VERSION(3, 2, 1): case IP_VERSION(3, 5, 0): + case IP_VERSION(3, 5, 1): if (amdgpu_sriov_vf(adev)) amdgpu_discovery_set_sriov_display(adev); else @@ -1987,6 +2017,7 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 0, 3): case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): amdgpu_device_ip_block_add(adev, &gfx_v11_0_ip_block); break; default: @@ -2034,6 +2065,7 @@ static int amdgpu_discovery_set_sdma_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(6, 0, 2): case IP_VERSION(6, 0, 3): case IP_VERSION(6, 1, 0): + case IP_VERSION(6, 1, 1): amdgpu_device_ip_block_add(adev, &sdma_v6_0_ip_block); break; default: @@ -2120,9 +2152,14 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &jpeg_v4_0_3_ip_block); break; case IP_VERSION(4, 0, 5): + case IP_VERSION(4, 0, 6): amdgpu_device_ip_block_add(adev, &vcn_v4_0_5_ip_block); amdgpu_device_ip_block_add(adev, &jpeg_v4_0_5_ip_block); break; + case IP_VERSION(5, 0, 0): + amdgpu_device_ip_block_add(adev, &vcn_v5_0_0_ip_block); + amdgpu_device_ip_block_add(adev, &jpeg_v5_0_0_ip_block); + break; default: dev_err(adev->dev, "Failed to add vcn/jpeg ip block(UVD_HWIP:0x%x)\n", @@ -2161,6 +2198,7 @@ static int amdgpu_discovery_set_mes_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 0, 3): case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): amdgpu_device_ip_block_add(adev, &mes_v11_0_ip_block); adev->enable_mes = true; adev->enable_mes_kiq = true; @@ -2186,6 +2224,7 @@ static int amdgpu_discovery_set_vpe_ip_blocks(struct amdgpu_device *adev) { switch (amdgpu_ip_version(adev, VPE_HWIP, 0)) { case IP_VERSION(6, 1, 0): + case IP_VERSION(6, 1, 1): amdgpu_device_ip_block_add(adev, &vpe_v6_1_ip_block); break; default: @@ -2199,6 +2238,7 @@ static int amdgpu_discovery_set_umsch_mm_ip_blocks(struct amdgpu_device *adev) { switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) { case IP_VERSION(4, 0, 5): + case IP_VERSION(4, 0, 6): if (amdgpu_umsch_mm & 0x1) { amdgpu_device_ip_block_add(adev, &umsch_mm_v4_0_ip_block); adev->enable_umsch_mm = true; @@ -2439,6 +2479,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) adev->family = AMDGPU_FAMILY_GC_11_0_1; break; case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): adev->family = AMDGPU_FAMILY_GC_11_5_0; break; default: @@ -2458,6 +2499,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): adev->flags |= AMD_IS_APU; break; default: @@ -2494,6 +2536,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) adev->nbio.hdp_flush_reg = &nbio_v7_9_hdp_flush_reg; break; case IP_VERSION(7, 11, 0): + case IP_VERSION(7, 11, 1): adev->nbio.funcs = &nbio_v7_11_funcs; adev->nbio.hdp_flush_reg = &nbio_v7_11_hdp_flush_reg; break; @@ -2529,6 +2572,10 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) adev->nbio.funcs = &nbio_v7_7_funcs; adev->nbio.hdp_flush_reg = &nbio_v7_7_hdp_flush_reg; break; + case IP_VERSION(6, 3, 1): + adev->nbio.funcs = &nbif_v6_3_1_funcs; + adev->nbio.hdp_flush_reg = &nbif_v6_3_1_hdp_flush_reg; + break; default: break; } @@ -2561,6 +2608,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(6, 1, 0): adev->hdp.funcs = &hdp_v6_0_funcs; break; + case IP_VERSION(7, 0, 0): + adev->hdp.funcs = &hdp_v7_0_funcs; + break; default: break; } @@ -2625,6 +2675,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(13, 0, 6): case IP_VERSION(13, 0, 8): case IP_VERSION(14, 0, 0): + case IP_VERSION(14, 0, 1): adev->smuio.funcs = &smuio_v13_0_6_funcs; break; default: @@ -2638,6 +2689,10 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(6, 0, 3): adev->lsdma.funcs = &lsdma_v6_0_funcs; break; + case IP_VERSION(7, 0, 0): + case IP_VERSION(7, 0, 1): + adev->lsdma.funcs = &lsdma_v7_0_funcs; + break; default: break; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index b8fbe97efe..3ecc7ef951 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -1350,14 +1350,6 @@ int amdgpu_display_modeset_create_props(struct amdgpu_device *adev) "dither", amdgpu_dither_enum_list, sz); - if (adev->dc_enabled) { - adev->mode_info.abm_level_property = - drm_property_create_range(adev_to_drm(adev), 0, - "abm level", 0, 4); - if (!adev->mode_info.abm_level_property) - return -ENOMEM; - } - return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index decbbe3d4f..055ba2ea4c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -377,6 +377,10 @@ amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach) struct amdgpu_vm_bo_base *bo_base; int r; + /* FIXME: This should be after the "if", but needs a fix to make sure + * DMABuf imports are initialized in the right VM list. + */ + amdgpu_vm_bo_invalidate(adev, bo, false); if (!bo->tbo.resource || bo->tbo.resource->mem_type == TTM_PL_SYSTEM) return; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 64b1bb2404..e4277298cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -195,10 +195,12 @@ int amdgpu_async_gfx_ring = 1; int amdgpu_mcbp = -1; int amdgpu_discovery = -1; int amdgpu_mes; +int amdgpu_mes_log_enable = 0; int amdgpu_mes_kiq; int amdgpu_noretry = -1; int amdgpu_force_asic_type = -1; int amdgpu_tmz = -1; /* auto */ +uint amdgpu_freesync_vid_mode; int amdgpu_reset_method = -1; /* auto */ int amdgpu_num_kcq = -1; int amdgpu_smartshift_bias; @@ -367,7 +369,7 @@ module_param_named(aspm, amdgpu_aspm, int, 0444); * Setting the value to 0 disables this functionality. * Setting the value to -2 is auto enabled with power down when displays are attached. */ -MODULE_PARM_DESC(runpm, "PX runtime pm (2 = force enable with BAMACO, 1 = force enable with BACO, 0 = disable, -1 = auto, -2 = autowith displays)"); +MODULE_PARM_DESC(runpm, "PX runtime pm (2 = force enable with BAMACO, 1 = force enable with BACO, 0 = disable, -1 = auto, -2 = auto with displays)"); module_param_named(runpm, amdgpu_runtime_pm, int, 0444); /** @@ -594,7 +596,7 @@ module_param_named(timeout_period, amdgpu_watchdog_timer.period, uint, 0644); #ifdef CONFIG_DRM_AMDGPU_SI #if IS_ENABLED(CONFIG_DRM_RADEON) || IS_ENABLED(CONFIG_DRM_RADEON_MODULE) -int amdgpu_si_support = 0; +int amdgpu_si_support; MODULE_PARM_DESC(si_support, "SI support (1 = enabled, 0 = disabled (default))"); #else int amdgpu_si_support = 1; @@ -613,7 +615,7 @@ module_param_named(si_support, amdgpu_si_support, int, 0444); #ifdef CONFIG_DRM_AMDGPU_CIK #if IS_ENABLED(CONFIG_DRM_RADEON) || IS_ENABLED(CONFIG_DRM_RADEON_MODULE) -int amdgpu_cik_support = 0; +int amdgpu_cik_support; MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled, 0 = disabled (default))"); #else int amdgpu_cik_support = 1; @@ -667,6 +669,15 @@ MODULE_PARM_DESC(mes, module_param_named(mes, amdgpu_mes, int, 0444); /** + * DOC: mes_log_enable (int) + * Enable Micro Engine Scheduler log. This is used to enable/disable MES internal log. + * (0 = disabled (default), 1 = enabled) + */ +MODULE_PARM_DESC(mes_log_enable, + "Enable Micro Engine Scheduler log (0 = disabled (default), 1 = enabled)"); +module_param_named(mes_log_enable, amdgpu_mes_log_enable, int, 0444); + +/** * DOC: mes_kiq (int) * Enable Micro Engine Scheduler KIQ. This is a new engine pipe for kiq. * (0 = disabled (default), 1 = enabled) @@ -849,12 +860,13 @@ module_param_named(visualconfirm, amdgpu_dc_visual_confirm, uint, 0444); * the ABM algorithm, with 1 being the least reduction and 4 being the most * reduction. * - * Defaults to 0, or disabled. Userspace can still override this level later - * after boot. + * Defaults to -1, or disabled. Userspace can only override this level after + * boot if it's set to auto. */ -uint amdgpu_dm_abm_level; -MODULE_PARM_DESC(abmlevel, "ABM level (0 = off (default), 1-4 = backlight reduction level) "); -module_param_named(abmlevel, amdgpu_dm_abm_level, uint, 0444); +int amdgpu_dm_abm_level = -1; +MODULE_PARM_DESC(abmlevel, + "ABM level (0 = off, 1-4 = backlight reduction level, -1 auto (default))"); +module_param_named(abmlevel, amdgpu_dm_abm_level, int, 0444); int amdgpu_backlight = -1; MODULE_PARM_DESC(backlight, "Backlight control (0 = pwm, 1 = aux, -1 auto (default))"); @@ -883,6 +895,32 @@ MODULE_PARM_DESC(tmz, "Enable TMZ feature (-1 = auto (default), 0 = off, 1 = on) module_param_named(tmz, amdgpu_tmz, int, 0444); /** + * DOC: freesync_video (uint) + * Enable the optimization to adjust front porch timing to achieve seamless + * mode change experience when setting a freesync supported mode for which full + * modeset is not needed. + * + * The Display Core will add a set of modes derived from the base FreeSync + * video mode into the corresponding connector's mode list based on commonly + * used refresh rates and VRR range of the connected display, when users enable + * this feature. From the userspace perspective, they can see a seamless mode + * change experience when the change between different refresh rates under the + * same resolution. Additionally, userspace applications such as Video playback + * can read this modeset list and change the refresh rate based on the video + * frame rate. Finally, the userspace can also derive an appropriate mode for a + * particular refresh rate based on the FreeSync Mode and add it to the + * connector's mode list. + * + * Note: This is an experimental feature. + * + * The default value: 0 (off). + */ +MODULE_PARM_DESC( + freesync_video, + "Enable freesync modesetting optimization feature (0 = off (default), 1 = on)"); +module_param_named(freesync_video, amdgpu_freesync_vid_mode, uint, 0444); + +/** * DOC: reset_method (int) * GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco) */ @@ -2668,7 +2706,7 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) } adev->in_runpm = true; - if (amdgpu_device_supports_px(drm_dev)) + if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX) drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; /* @@ -2678,7 +2716,7 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) * platforms. * TODO: this may be also needed for PX capable platform. */ - if (amdgpu_device_supports_boco(drm_dev)) + if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO) adev->mp1_state = PP_MP1_STATE_UNLOAD; ret = amdgpu_device_prepare(drm_dev); @@ -2687,15 +2725,15 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) ret = amdgpu_device_suspend(drm_dev, false); if (ret) { adev->in_runpm = false; - if (amdgpu_device_supports_boco(drm_dev)) + if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO) adev->mp1_state = PP_MP1_STATE_NONE; return ret; } - if (amdgpu_device_supports_boco(drm_dev)) + if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO) adev->mp1_state = PP_MP1_STATE_NONE; - if (amdgpu_device_supports_px(drm_dev)) { + if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX) { /* Only need to handle PCI state in the driver for ATPX * PCI core handles it for _PR3. */ @@ -2704,9 +2742,9 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) pci_ignore_hotplug(pdev); pci_set_power_state(pdev, PCI_D3cold); drm_dev->switch_power_state = DRM_SWITCH_POWER_DYNAMIC_OFF; - } else if (amdgpu_device_supports_boco(drm_dev)) { + } else if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO) { /* nothing to do */ - } else if (amdgpu_device_supports_baco(drm_dev)) { + } else if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) { amdgpu_device_baco_enter(drm_dev); } @@ -2729,7 +2767,7 @@ static int amdgpu_pmops_runtime_resume(struct device *dev) if (!pci_device_is_present(adev->pdev)) adev->no_hw_access = true; - if (amdgpu_device_supports_px(drm_dev)) { + if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX) { drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; /* Only need to handle PCI state in the driver for ATPX @@ -2741,22 +2779,22 @@ static int amdgpu_pmops_runtime_resume(struct device *dev) if (ret) return ret; pci_set_master(pdev); - } else if (amdgpu_device_supports_boco(drm_dev)) { + } else if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO) { /* Only need to handle PCI state in the driver for ATPX * PCI core handles it for _PR3. */ pci_set_master(pdev); - } else if (amdgpu_device_supports_baco(drm_dev)) { + } else if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) { amdgpu_device_baco_exit(drm_dev); } ret = amdgpu_device_resume(drm_dev, false); if (ret) { - if (amdgpu_device_supports_px(drm_dev)) + if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX) pci_disable_device(pdev); return ret; } - if (amdgpu_device_supports_px(drm_dev)) + if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX) drm_dev->switch_power_state = DRM_SWITCH_POWER_ON; adev->in_runpm = false; return 0; @@ -2766,8 +2804,7 @@ static int amdgpu_pmops_runtime_idle(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(drm_dev); - /* we don't want the main rpm_idle to call suspend - we want to autosuspend */ - int ret = 1; + int ret; if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE) { pm_runtime_forbid(dev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 70bff8cecf..10832b4704 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -61,9 +61,7 @@ static struct kmem_cache *amdgpu_fence_slab; int amdgpu_fence_slab_init(void) { - amdgpu_fence_slab = kmem_cache_create( - "amdgpu_fence", sizeof(struct amdgpu_fence), 0, - SLAB_HWCACHE_ALIGN, NULL); + amdgpu_fence_slab = KMEM_CACHE(amdgpu_fence, SLAB_HWCACHE_ALIGN); if (!amdgpu_fence_slab) return -ENOMEM; return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 49a5f1c73b..67c234bcf8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -187,7 +187,40 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj, else ++bo_va->ref_count; amdgpu_bo_unreserve(abo); - return 0; + + /* Validate and add eviction fence to DMABuf imports with dynamic + * attachment in compute VMs. Re-validation will be done by + * amdgpu_vm_validate. Fences are on the reservation shared with the + * export, which is currently required to be validated and fenced + * already by amdgpu_amdkfd_gpuvm_restore_process_bos. + * + * Nested locking below for the case that a GEM object is opened in + * kfd_mem_export_dmabuf. Since the lock below is only taken for imports, + * but not for export, this is a different lock class that cannot lead to + * circular lock dependencies. + */ + if (!vm->is_compute_context || !vm->process_info) + return 0; + if (!obj->import_attach || + !dma_buf_is_dynamic(obj->import_attach->dmabuf)) + return 0; + mutex_lock_nested(&vm->process_info->lock, 1); + if (!WARN_ON(!vm->process_info->eviction_fence)) { + r = amdgpu_amdkfd_bo_validate_and_fence(abo, AMDGPU_GEM_DOMAIN_GTT, + &vm->process_info->eviction_fence->base); + if (r) { + struct amdgpu_task_info *ti = amdgpu_vm_get_task_info_vm(vm); + + dev_warn(adev->dev, "validate_and_fence failed: %d\n", r); + if (ti) { + dev_warn(adev->dev, "pid %d\n", ti->pid); + amdgpu_vm_put_task_info(ti); + } + } + } + mutex_unlock(&vm->process_info->lock); + + return r; } static void amdgpu_gem_object_close(struct drm_gem_object *obj, @@ -682,10 +715,10 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, uint64_t vm_size; int r = 0; - if (args->va_address < AMDGPU_VA_RESERVED_SIZE) { + if (args->va_address < AMDGPU_VA_RESERVED_BOTTOM) { dev_dbg(dev->dev, "va_address 0x%llx is in reserved area 0x%llx\n", - args->va_address, AMDGPU_VA_RESERVED_SIZE); + args->va_address, AMDGPU_VA_RESERVED_BOTTOM); return -EINVAL; } @@ -701,7 +734,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, args->va_address &= AMDGPU_GMC_HOLE_MASK; vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE; - vm_size -= AMDGPU_VA_RESERVED_SIZE; + vm_size -= AMDGPU_VA_RESERVED_TOP; if (args->va_address + args->map_size > vm_size) { dev_dbg(dev->dev, "va_address 0x%llx is in top reserved area 0x%llx\n", diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 6ddc8e3360..55d5508987 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -304,11 +304,11 @@ static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev, return -EINVAL; } -int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, - struct amdgpu_ring *ring, - struct amdgpu_irq_src *irq, int xcc_id) +int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, int xcc_id) { struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; + struct amdgpu_irq_src *irq = &kiq->irq; + struct amdgpu_ring *ring = &kiq->ring; int r = 0; spin_lock_init(&kiq->ring_lock); @@ -329,7 +329,8 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, ring->eop_gpu_addr = kiq->eop_gpu_addr; ring->no_scheduler = true; - sprintf(ring->name, "kiq_%d.%d.%d.%d", xcc_id, ring->me, ring->pipe, ring->queue); + snprintf(ring->name, sizeof(ring->name), "kiq_%d.%d.%d.%d", + xcc_id, ring->me, ring->pipe, ring->queue); r = amdgpu_ring_init(adev, ring, 1024, irq, AMDGPU_CP_KIQ_IRQ_DRIVER0, AMDGPU_RING_PRIO_DEFAULT, NULL); if (r) @@ -642,8 +643,8 @@ int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id) kiq->pmf->kiq_set_resources(kiq_ring, queue_mask); for (i = 0; i < adev->gfx.num_compute_rings; i++) { j = i + xcc_id * adev->gfx.num_compute_rings; - kiq->pmf->kiq_map_queues(kiq_ring, - &adev->gfx.compute_ring[j]); + kiq->pmf->kiq_map_queues(kiq_ring, + &adev->gfx.compute_ring[j]); } r = amdgpu_ring_test_helper(kiq_ring); @@ -686,7 +687,7 @@ int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id) r = amdgpu_ring_test_helper(kiq_ring); spin_unlock(&kiq->ring_lock); if (r) - DRM_ERROR("KCQ enable failed\n"); + DRM_ERROR("KGQ enable failed\n"); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index f23bafec71..8fcf889ddc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -471,9 +471,7 @@ static inline u32 amdgpu_gfx_create_bitmask(u32 bit_width) void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_sh); -int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, - struct amdgpu_ring *ring, - struct amdgpu_irq_src *irq, int xcc_id); +int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, int xcc_id); void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 55784a9f26..08b9dfb653 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -52,7 +52,7 @@ int amdgpu_gmc_pdb0_alloc(struct amdgpu_device *adev) struct amdgpu_bo_param bp; u64 vram_size = adev->gmc.xgmi.node_segment_size * adev->gmc.xgmi.num_physical_nodes; uint32_t pde0_page_shift = adev->gmc.vmid0_page_table_block_size + 21; - uint32_t npdes = (vram_size + (1ULL << pde0_page_shift) -1) >> pde0_page_shift; + uint32_t npdes = (vram_size + (1ULL << pde0_page_shift) - 1) >> pde0_page_shift; memset(&bp, 0, sizeof(bp)); bp.size = PAGE_ALIGN((npdes + 1) * 8); @@ -684,12 +684,17 @@ int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid, struct amdgpu_ring *ring = &adev->gfx.kiq[inst].ring; struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst]; unsigned int ndw; - signed long r; + int r; uint32_t seq; - if (!adev->gmc.flush_pasid_uses_kiq || !ring->sched.ready || - !down_read_trylock(&adev->reset_domain->sem)) { + /* + * A GPU reset should flush all TLBs anyway, so no need to do + * this while one is ongoing. + */ + if (!down_read_trylock(&adev->reset_domain->sem)) + return 0; + if (!adev->gmc.flush_pasid_uses_kiq || !ring->sched.ready) { if (adev->gmc.flush_tlb_needs_extra_type_2) adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid, 2, all_hub, @@ -703,49 +708,99 @@ int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid, adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub, inst); - return 0; - } + r = 0; + } else { + /* 2 dwords flush + 8 dwords fence */ + ndw = kiq->pmf->invalidate_tlbs_size + 8; - /* 2 dwords flush + 8 dwords fence */ - ndw = kiq->pmf->invalidate_tlbs_size + 8; + if (adev->gmc.flush_tlb_needs_extra_type_2) + ndw += kiq->pmf->invalidate_tlbs_size; - if (adev->gmc.flush_tlb_needs_extra_type_2) - ndw += kiq->pmf->invalidate_tlbs_size; + if (adev->gmc.flush_tlb_needs_extra_type_0) + ndw += kiq->pmf->invalidate_tlbs_size; - if (adev->gmc.flush_tlb_needs_extra_type_0) - ndw += kiq->pmf->invalidate_tlbs_size; + spin_lock(&adev->gfx.kiq[inst].ring_lock); + amdgpu_ring_alloc(ring, ndw); + if (adev->gmc.flush_tlb_needs_extra_type_2) + kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 2, all_hub); - spin_lock(&adev->gfx.kiq[inst].ring_lock); - amdgpu_ring_alloc(ring, ndw); - if (adev->gmc.flush_tlb_needs_extra_type_2) - kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 2, all_hub); + if (flush_type == 2 && adev->gmc.flush_tlb_needs_extra_type_0) + kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 0, all_hub); - if (flush_type == 2 && adev->gmc.flush_tlb_needs_extra_type_0) - kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 0, all_hub); + kiq->pmf->kiq_invalidate_tlbs(ring, pasid, flush_type, all_hub); + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); + if (r) { + amdgpu_ring_undo(ring); + spin_unlock(&adev->gfx.kiq[inst].ring_lock); + goto error_unlock_reset; + } - kiq->pmf->kiq_invalidate_tlbs(ring, pasid, flush_type, all_hub); - r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); - if (r) { - amdgpu_ring_undo(ring); + amdgpu_ring_commit(ring); spin_unlock(&adev->gfx.kiq[inst].ring_lock); - goto error_unlock_reset; - } - - amdgpu_ring_commit(ring); - spin_unlock(&adev->gfx.kiq[inst].ring_lock); - r = amdgpu_fence_wait_polling(ring, seq, usec_timeout); - if (r < 1) { - dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r); - r = -ETIME; - goto error_unlock_reset; + if (amdgpu_fence_wait_polling(ring, seq, usec_timeout) < 1) { + dev_err(adev->dev, "timeout waiting for kiq fence\n"); + r = -ETIME; + } } - r = 0; error_unlock_reset: up_read(&adev->reset_domain->sem); return r; } +void amdgpu_gmc_fw_reg_write_reg_wait(struct amdgpu_device *adev, + uint32_t reg0, uint32_t reg1, + uint32_t ref, uint32_t mask, + uint32_t xcc_inst) +{ + struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_inst]; + struct amdgpu_ring *ring = &kiq->ring; + signed long r, cnt = 0; + unsigned long flags; + uint32_t seq; + + if (adev->mes.ring.sched.ready) { + amdgpu_mes_reg_write_reg_wait(adev, reg0, reg1, + ref, mask); + return; + } + + spin_lock_irqsave(&kiq->ring_lock, flags); + amdgpu_ring_alloc(ring, 32); + amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1, + ref, mask); + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); + if (r) + goto failed_undo; + + amdgpu_ring_commit(ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + + /* don't wait anymore for IRQ context */ + if (r < 1 && in_interrupt()) + goto failed_kiq; + + might_sleep(); + while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { + + msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + } + + if (cnt > MAX_KIQ_REG_TRY) + goto failed_kiq; + + return; + +failed_undo: + amdgpu_ring_undo(ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); +failed_kiq: + dev_err(adev->dev, "failed to write reg %x wait reg %x\n", reg0, reg1); +} + /** * amdgpu_gmc_tmz_set -- check and set if a device supports TMZ * @adev: amdgpu_device pointer @@ -790,6 +845,7 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev) case IP_VERSION(10, 3, 3): case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): /* Don't enable it by default yet. */ if (amdgpu_tmz < 1) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index e699d1ca8d..17f40ea110 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -417,6 +417,10 @@ void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid, uint32_t flush_type, bool all_hub, uint32_t inst); +void amdgpu_gmc_fw_reg_write_reg_wait(struct amdgpu_device *adev, + uint32_t reg0, uint32_t reg1, + uint32_t ref, uint32_t mask, + uint32_t xcc_inst); extern void amdgpu_gmc_tmz_set(struct amdgpu_device *adev); extern void amdgpu_gmc_noretry_set(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 6aa3b1d845..8b512dc28d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -131,7 +131,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, struct amdgpu_ib *ib = &ibs[0]; struct dma_fence *tmp = NULL; bool need_ctx_switch; - unsigned int patch_offset = ~0; struct amdgpu_vm *vm; uint64_t fence_ctx; uint32_t status = 0, alloc_size; @@ -139,10 +138,11 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, bool secure, init_shadow; u64 shadow_va, csa_va, gds_va; int vmid = AMDGPU_JOB_GET_VMID(job); + bool need_pipe_sync = false; + unsigned int cond_exec; unsigned int i; int r = 0; - bool need_pipe_sync = false; if (num_ibs == 0) return -EINVAL; @@ -228,7 +228,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, init_shadow, vmid); if (ring->funcs->init_cond_exec) - patch_offset = amdgpu_ring_init_cond_exec(ring); + cond_exec = amdgpu_ring_init_cond_exec(ring, + ring->cond_exe_gpu_addr); amdgpu_device_flush_hdp(adev, ring); @@ -278,16 +279,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, fence_flags | AMDGPU_FENCE_FLAG_64BIT); } - if (ring->funcs->emit_gfx_shadow) { + if (ring->funcs->emit_gfx_shadow && ring->funcs->init_cond_exec) { amdgpu_ring_emit_gfx_shadow(ring, 0, 0, 0, false, 0); - - if (ring->funcs->init_cond_exec) { - unsigned int ce_offset = ~0; - - ce_offset = amdgpu_ring_init_cond_exec(ring); - if (ce_offset != ~0 && ring->funcs->patch_cond_exec) - amdgpu_ring_patch_cond_exec(ring, ce_offset); - } + amdgpu_ring_init_cond_exec(ring, ring->cond_exe_gpu_addr); } r = amdgpu_fence_emit(ring, f, job, fence_flags); @@ -302,8 +296,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, if (ring->funcs->insert_end) ring->funcs->insert_end(ring); - if (patch_offset != ~0 && ring->funcs->patch_cond_exec) - amdgpu_ring_patch_cond_exec(ring, patch_offset); + amdgpu_ring_patch_cond_exec(ring, cond_exec); ring->current_ctx = fence_ctx; if (vm && ring->funcs->emit_switch_buffer) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index ddd0891da1..3d7fcdeaf8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -62,9 +62,8 @@ int amdgpu_pasid_alloc(unsigned int bits) int pasid = -EINVAL; for (bits = min(bits, 31U); bits > 0; bits--) { - pasid = ida_simple_get(&amdgpu_pasid_ida, - 1U << (bits - 1), 1U << bits, - GFP_KERNEL); + pasid = ida_alloc_range(&amdgpu_pasid_ida, 1U << (bits - 1), + (1U << bits) - 1, GFP_KERNEL); if (pasid != -ENOSPC) break; } @@ -82,7 +81,7 @@ int amdgpu_pasid_alloc(unsigned int bits) void amdgpu_pasid_free(u32 pasid) { trace_amdgpu_pasid_freed(pasid); - ida_simple_remove(&amdgpu_pasid_ida, pasid); + ida_free(&amdgpu_pasid_ida, pasid); } static void amdgpu_pasid_free_cb(struct dma_fence *fence, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 0b8c6581b6..e4742b6503 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -35,7 +35,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) { struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched); struct amdgpu_job *job = to_amdgpu_job(s_job); - struct amdgpu_task_info ti; + struct amdgpu_task_info *ti; struct amdgpu_device *adev = ring->adev; int idx; int r; @@ -48,7 +48,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) return DRM_GPU_SCHED_STAT_ENODEV; } - memset(&ti, 0, sizeof(struct amdgpu_task_info)); + adev->job_hang = true; if (amdgpu_gpu_recovery && @@ -58,12 +58,16 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) goto exit; } - amdgpu_vm_get_task_info(ring->adev, job->pasid, &ti); DRM_ERROR("ring %s timeout, signaled seq=%u, emitted seq=%u\n", - job->base.sched->name, atomic_read(&ring->fence_drv.last_seq), - ring->fence_drv.sync_seq); - DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n", - ti.process_name, ti.tgid, ti.task_name, ti.pid); + job->base.sched->name, atomic_read(&ring->fence_drv.last_seq), + ring->fence_drv.sync_seq); + + ti = amdgpu_vm_get_task_info_pasid(ring->adev, job->pasid); + if (ti) { + DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n", + ti->process_name, ti->tgid, ti->task_name, ti->pid); + amdgpu_vm_put_task_info(ti); + } dma_fence_set_error(&s_job->s_fence->finished, -ETIME); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c index 2ff2897fd1..6df99cb00d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c @@ -36,10 +36,35 @@ static void amdgpu_jpeg_idle_work_handler(struct work_struct *work); int amdgpu_jpeg_sw_init(struct amdgpu_device *adev) { + int i, r; + INIT_DELAYED_WORK(&adev->jpeg.idle_work, amdgpu_jpeg_idle_work_handler); mutex_init(&adev->jpeg.jpeg_pg_lock); atomic_set(&adev->jpeg.total_submission_cnt, 0); + if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && + (adev->pg_flags & AMD_PG_SUPPORT_JPEG_DPG)) + adev->jpeg.indirect_sram = true; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + if (adev->jpeg.indirect_sram) { + r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM | + AMDGPU_GEM_DOMAIN_GTT, + &adev->jpeg.inst[i].dpg_sram_bo, + &adev->jpeg.inst[i].dpg_sram_gpu_addr, + &adev->jpeg.inst[i].dpg_sram_cpu_addr); + if (r) { + dev_err(adev->dev, + "JPEG %d (%d) failed to allocate DPG bo\n", i, r); + return r; + } + } + } + return 0; } @@ -51,6 +76,11 @@ int amdgpu_jpeg_sw_fini(struct amdgpu_device *adev) if (adev->jpeg.harvest_config & (1 << i)) continue; + amdgpu_bo_free_kernel( + &adev->jpeg.inst[i].dpg_sram_bo, + &adev->jpeg.inst[i].dpg_sram_gpu_addr, + (void **)&adev->jpeg.inst[i].dpg_sram_cpu_addr); + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) amdgpu_ring_fini(&adev->jpeg.inst[i].ring_dec[j]); } @@ -210,12 +240,15 @@ int amdgpu_jpeg_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout) } else { r = 0; } + if (!amdgpu_sriov_vf(adev)) { for (i = 0; i < adev->usec_timeout; i++) { tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch[ring->pipe]); if (tmp == 0xDEADBEEF) break; udelay(1); + if (amdgpu_emu_mode == 1) + udelay(10); } if (i >= adev->usec_timeout) @@ -296,3 +329,16 @@ int amdgpu_jpeg_ras_sw_init(struct amdgpu_device *adev) return 0; } + +int amdgpu_jpeg_psp_update_sram(struct amdgpu_device *adev, int inst_idx, + enum AMDGPU_UCODE_ID ucode_id) +{ + struct amdgpu_firmware_info ucode = { + .ucode_id = AMDGPU_UCODE_ID_JPEG_RAM, + .mc_addr = adev->jpeg.inst[inst_idx].dpg_sram_gpu_addr, + .ucode_size = ((uintptr_t)adev->jpeg.inst[inst_idx].dpg_sram_curr_addr - + (uintptr_t)adev->jpeg.inst[inst_idx].dpg_sram_cpu_addr), + }; + + return psp_execute_ip_fw_load(&adev->psp, &ucode); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h index ffe47e9f5b..aea31d61d9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h @@ -32,6 +32,34 @@ #define AMDGPU_JPEG_HARVEST_JPEG0 (1 << 0) #define AMDGPU_JPEG_HARVEST_JPEG1 (1 << 1) +#define WREG32_SOC15_JPEG_DPG_MODE(inst_idx, offset, value, indirect) \ + do { \ + if (!indirect) { \ + WREG32_SOC15(JPEG, GET_INST(JPEG, inst_idx), \ + mmUVD_DPG_LMA_DATA, value); \ + WREG32_SOC15( \ + JPEG, GET_INST(JPEG, inst_idx), \ + mmUVD_DPG_LMA_CTL, \ + (UVD_DPG_LMA_CTL__READ_WRITE_MASK | \ + offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT | \ + indirect << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \ + } else { \ + *adev->jpeg.inst[inst_idx].dpg_sram_curr_addr++ = \ + offset; \ + *adev->jpeg.inst[inst_idx].dpg_sram_curr_addr++ = \ + value; \ + } \ + } while (0) + +#define RREG32_SOC15_JPEG_DPG_MODE(inst_idx, offset, mask_en) \ + ({ \ + WREG32_SOC15(JPEG, inst_idx, mmUVD_DPG_LMA_CTL, \ + (0x0 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \ + mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \ + offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \ + RREG32_SOC15(JPEG, inst_idx, mmUVD_DPG_LMA_DATA); \ + }) + struct amdgpu_jpeg_reg{ unsigned jpeg_pitch[AMDGPU_MAX_JPEG_RINGS]; }; @@ -41,6 +69,11 @@ struct amdgpu_jpeg_inst { struct amdgpu_irq_src irq; struct amdgpu_irq_src ras_poison_irq; struct amdgpu_jpeg_reg external; + struct amdgpu_bo *dpg_sram_bo; + struct dpg_pause_state pause_state; + void *dpg_sram_cpu_addr; + uint64_t dpg_sram_gpu_addr; + uint32_t *dpg_sram_curr_addr; uint8_t aid_id; }; @@ -63,6 +96,7 @@ struct amdgpu_jpeg { uint16_t inst_mask; uint8_t num_inst_per_aid; + bool indirect_sram; }; int amdgpu_jpeg_sw_init(struct amdgpu_device *adev); @@ -82,5 +116,7 @@ int amdgpu_jpeg_process_poison_irq(struct amdgpu_device *adev, int amdgpu_jpeg_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); int amdgpu_jpeg_ras_sw_init(struct amdgpu_device *adev); +int amdgpu_jpeg_psp_update_sram(struct amdgpu_device *adev, int inst_idx, + enum AMDGPU_UCODE_ID ucode_id); #endif /*__AMDGPU_JPEG_H__*/ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index bf4f48fe43..a2df3025a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -894,14 +894,14 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) dev_info->ids_flags |= AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD; vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE; - vm_size -= AMDGPU_VA_RESERVED_SIZE; + vm_size -= AMDGPU_VA_RESERVED_TOP; /* Older VCE FW versions are buggy and can handle only 40bits */ if (adev->vce.fw_version && adev->vce.fw_version < AMDGPU_VCE_FW_53_45) vm_size = min(vm_size, 1ULL << 40); - dev_info->virtual_address_offset = AMDGPU_VA_RESERVED_SIZE; + dev_info->virtual_address_offset = AMDGPU_VA_RESERVED_BOTTOM; dev_info->virtual_address_max = min(vm_size, AMDGPU_GMC_HOLE_START); @@ -1114,6 +1114,15 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) } ui32 >>= 8; break; + case AMDGPU_INFO_SENSOR_GPU_INPUT_POWER: + /* get input GPU power */ + if (amdgpu_dpm_read_sensor(adev, + AMDGPU_PP_SENSOR_GPU_INPUT_POWER, + (void *)&ui32, &ui32_size)) { + return -EINVAL; + } + ui32 >>= 8; + break; case AMDGPU_INFO_SENSOR_VDDNB: /* get VDDNB in millivolts */ if (amdgpu_dpm_read_sensor(adev, @@ -1370,6 +1379,10 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) goto error_vm; } + r = amdgpu_seq64_map(adev, &fpriv->vm, &fpriv->seq64_va); + if (r) + goto error_vm; + mutex_init(&fpriv->bo_list_lock); idr_init_base(&fpriv->bo_list_handles, 1); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c index 59fafb8392..24ad4b9717 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c @@ -27,6 +27,16 @@ #include "umc/umc_6_7_0_offset.h" #include "umc/umc_6_7_0_sh_mask.h" +static bool amdgpu_mca_is_deferred_error(struct amdgpu_device *adev, + uint64_t mc_status) +{ + if (adev->umc.ras->check_ecc_err_status) + return adev->umc.ras->check_ecc_err_status(adev, + AMDGPU_MCA_ERROR_TYPE_DE, &mc_status); + + return false; +} + void amdgpu_mca_query_correctable_error_count(struct amdgpu_device *adev, uint64_t mc_status_addr, unsigned long *error_count) @@ -202,16 +212,16 @@ int amdgpu_mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable) static void amdgpu_mca_smu_mca_bank_dump(struct amdgpu_device *adev, int idx, struct mca_bank_entry *entry) { - dev_info(adev->dev, "[Hardware error] Accelerator Check Architecture events logged\n"); - dev_info(adev->dev, "[Hardware error] aca entry[%02d].STATUS=0x%016llx\n", + dev_info(adev->dev, HW_ERR "Accelerator Check Architecture events logged\n"); + dev_info(adev->dev, HW_ERR "aca entry[%02d].STATUS=0x%016llx\n", idx, entry->regs[MCA_REG_IDX_STATUS]); - dev_info(adev->dev, "[Hardware error] aca entry[%02d].ADDR=0x%016llx\n", + dev_info(adev->dev, HW_ERR "aca entry[%02d].ADDR=0x%016llx\n", idx, entry->regs[MCA_REG_IDX_ADDR]); - dev_info(adev->dev, "[Hardware error] aca entry[%02d].MISC0=0x%016llx\n", + dev_info(adev->dev, HW_ERR "aca entry[%02d].MISC0=0x%016llx\n", idx, entry->regs[MCA_REG_IDX_MISC0]); - dev_info(adev->dev, "[Hardware error] aca entry[%02d].IPID=0x%016llx\n", + dev_info(adev->dev, HW_ERR "aca entry[%02d].IPID=0x%016llx\n", idx, entry->regs[MCA_REG_IDX_IPID]); - dev_info(adev->dev, "[Hardware error] aca entry[%02d].SYND=0x%016llx\n", + dev_info(adev->dev, HW_ERR "aca entry[%02d].SYND=0x%016llx\n", idx, entry->regs[MCA_REG_IDX_SYND]); } @@ -256,9 +266,14 @@ int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_blo if (type == AMDGPU_MCA_ERROR_TYPE_UE) amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, &err_addr, (uint64_t)count); - else - amdgpu_ras_error_statistic_ce_count(err_data, - &mcm_info, &err_addr, (uint64_t)count); + else { + if (amdgpu_mca_is_deferred_error(adev, entry->regs[MCA_REG_IDX_STATUS])) + amdgpu_ras_error_statistic_de_count(err_data, + &mcm_info, &err_addr, (uint64_t)count); + else + amdgpu_ras_error_statistic_ce_count(err_data, + &mcm_info, &err_addr, (uint64_t)count); + } } out_mca_release: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h index b399f1b628..b964110ed1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h @@ -65,6 +65,7 @@ enum amdgpu_mca_ip { enum amdgpu_mca_error_type { AMDGPU_MCA_ERROR_TYPE_UE = 0, AMDGPU_MCA_ERROR_TYPE_CE, + AMDGPU_MCA_ERROR_TYPE_DE, }; struct amdgpu_mca_ras_block { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 420e5bc44e..1569bef030 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -102,7 +102,10 @@ static int amdgpu_mes_event_log_init(struct amdgpu_device *adev) { int r; - r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE, + if (!amdgpu_mes_log_enable) + return 0; + + r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_LOG_BUFFER_SIZE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, &adev->mes.event_log_gpu_obj, &adev->mes.event_log_gpu_addr, @@ -1399,7 +1402,7 @@ int amdgpu_mes_self_test(struct amdgpu_device *adev) goto error_fini; } - ctx_data.meta_data_gpu_addr = AMDGPU_VA_RESERVED_SIZE; + ctx_data.meta_data_gpu_addr = AMDGPU_VA_RESERVED_BOTTOM; r = amdgpu_mes_ctx_map_meta_data(adev, vm, &ctx_data); if (r) { DRM_ERROR("failed to map ctx meta data\n"); @@ -1550,12 +1553,11 @@ static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused) uint32_t *mem = (uint32_t *)(adev->mes.event_log_cpu_addr); seq_hex_dump(m, "", DUMP_PREFIX_OFFSET, 32, 4, - mem, PAGE_SIZE, false); + mem, AMDGPU_MES_LOG_BUFFER_SIZE, false); return 0; } - DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_mes_event_log); #endif @@ -1566,9 +1568,9 @@ void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev) #if defined(CONFIG_DEBUG_FS) struct drm_minor *minor = adev_to_drm(adev)->primary; struct dentry *root = minor->debugfs_root; - - debugfs_create_file("amdgpu_mes_event_log", 0444, root, - adev, &amdgpu_debugfs_mes_event_log_fops); + if (adev->enable_mes && amdgpu_mes_log_enable) + debugfs_create_file("amdgpu_mes_event_log", 0444, root, + adev, &amdgpu_debugfs_mes_event_log_fops); #endif } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 7d4f93fea9..4c8fc3117e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -52,6 +52,7 @@ enum amdgpu_mes_priority_level { #define AMDGPU_MES_PROC_CTX_SIZE 0x1000 /* one page area */ #define AMDGPU_MES_GANG_CTX_SIZE 0x1000 /* one page area */ +#define AMDGPU_MES_LOG_BUFFER_SIZE 0x4000 /* Maximu log buffer size for MES */ struct amdgpu_mes_funcs; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 2e4911050c..1fe21a70dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -324,8 +324,6 @@ struct amdgpu_mode_info { struct drm_property *audio_property; /* FMT dithering */ struct drm_property *dither_property; - /* Adaptive Backlight Modulation (power feature) */ - struct drm_property *abm_level_property; /* hardcoded DFP edid from BIOS */ struct edid *bios_hardcoded_edid; int bios_hardcoded_edid_size; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c index 51ca544a70..d085687a47 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c @@ -53,14 +53,6 @@ u64 amdgpu_nbio_get_pcie_replay_count(struct amdgpu_device *adev) return 0; } -void amdgpu_nbio_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, - uint64_t *count1) -{ - if (adev->nbio.funcs->get_pcie_usage) - adev->nbio.funcs->get_pcie_usage(adev, count0, count1); - -} - int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) { int r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h index 65e35059de..7b8c03be1d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h @@ -102,8 +102,6 @@ struct amdgpu_nbio_funcs { u32 (*get_memory_partition_mode)(struct amdgpu_device *adev, u32 *supp_modes); u64 (*get_pcie_replay_count)(struct amdgpu_device *adev); - void (*get_pcie_usage)(struct amdgpu_device *adev, uint64_t *count0, - uint64_t *count1); }; struct amdgpu_nbio { @@ -116,7 +114,6 @@ struct amdgpu_nbio { }; int amdgpu_nbio_ras_sw_init(struct amdgpu_device *adev); -void amdgpu_nbio_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, uint64_t *count1); int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); u64 amdgpu_nbio_get_pcie_replay_count(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 3af505e8cb..f6d503432a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -220,9 +220,6 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) placement->num_placement = c; placement->placement = places; - - placement->num_busy_placement = c; - placement->busy_placement = places; } /** @@ -1412,8 +1409,7 @@ vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) AMDGPU_GEM_DOMAIN_GTT); /* Avoid costly evictions; only set GTT as a busy placement */ - abo->placement.num_busy_placement = 1; - abo->placement.busy_placement = &abo->placements[1]; + abo->placements[0].flags |= TTM_PL_FLAG_DESIRED; r = ttm_bo_validate(bo, &abo->placement, &ctx); if (unlikely(r == -EBUSY || r == -ERESTARTSYS)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 0328616473..94b310fdb7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -38,6 +38,7 @@ #include "psp_v12_0.h" #include "psp_v13_0.h" #include "psp_v13_0_4.h" +#include "psp_v14_0.h" #include "amdgpu_ras.h" #include "amdgpu_securedisplay.h" @@ -162,20 +163,26 @@ static int psp_early_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct psp_context *psp = &adev->psp; + psp->autoload_supported = true; + psp->boot_time_tmr = true; + switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) { case IP_VERSION(9, 0, 0): psp_v3_1_set_psp_funcs(psp); psp->autoload_supported = false; + psp->boot_time_tmr = false; break; case IP_VERSION(10, 0, 0): case IP_VERSION(10, 0, 1): psp_v10_0_set_psp_funcs(psp); psp->autoload_supported = false; + psp->boot_time_tmr = false; break; case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 4): psp_v11_0_set_psp_funcs(psp); psp->autoload_supported = false; + psp->boot_time_tmr = false; break; case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 7): @@ -188,15 +195,20 @@ static int psp_early_init(void *handle) case IP_VERSION(11, 0, 12): case IP_VERSION(11, 0, 13): psp_v11_0_set_psp_funcs(psp); - psp->autoload_supported = true; + psp->boot_time_tmr = false; break; case IP_VERSION(11, 0, 3): case IP_VERSION(12, 0, 1): psp_v12_0_set_psp_funcs(psp); + psp->autoload_supported = false; + psp->boot_time_tmr = false; break; case IP_VERSION(13, 0, 2): + psp->boot_time_tmr = false; + fallthrough; case IP_VERSION(13, 0, 6): psp_v13_0_set_psp_funcs(psp); + psp->autoload_supported = false; break; case IP_VERSION(13, 0, 1): case IP_VERSION(13, 0, 3): @@ -204,25 +216,31 @@ static int psp_early_init(void *handle) case IP_VERSION(13, 0, 8): case IP_VERSION(13, 0, 11): case IP_VERSION(14, 0, 0): + case IP_VERSION(14, 0, 1): psp_v13_0_set_psp_funcs(psp); - psp->autoload_supported = true; + psp->boot_time_tmr = false; break; case IP_VERSION(11, 0, 8): if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2) { psp_v11_0_8_set_psp_funcs(psp); - psp->autoload_supported = false; } + psp->autoload_supported = false; + psp->boot_time_tmr = false; break; case IP_VERSION(13, 0, 0): case IP_VERSION(13, 0, 7): case IP_VERSION(13, 0, 10): psp_v13_0_set_psp_funcs(psp); - psp->autoload_supported = true; adev->psp.sup_ifwi_up = !amdgpu_sriov_vf(adev); + psp->boot_time_tmr = false; break; case IP_VERSION(13, 0, 4): psp_v13_0_4_set_psp_funcs(psp); - psp->autoload_supported = true; + psp->boot_time_tmr = false; + break; + case IP_VERSION(14, 0, 2): + case IP_VERSION(14, 0, 3): + psp_v14_0_set_psp_funcs(psp); break; default: return -EINVAL; @@ -230,6 +248,8 @@ static int psp_early_init(void *handle) psp->adev = adev; + adev->psp_timeout = 20000; + psp_check_pmfw_centralized_cstate_management(psp); if (amdgpu_sriov_vf(adev)) @@ -291,21 +311,22 @@ static int psp_memory_training_init(struct psp_context *psp) struct psp_memory_training_context *ctx = &psp->mem_train_ctx; if (ctx->init != PSP_MEM_TRAIN_RESERVE_SUCCESS) { - DRM_DEBUG("memory training is not supported!\n"); + dev_dbg(psp->adev->dev, "memory training is not supported!\n"); return 0; } ctx->sys_cache = kzalloc(ctx->train_data_size, GFP_KERNEL); if (ctx->sys_cache == NULL) { - DRM_ERROR("alloc mem_train_ctx.sys_cache failed!\n"); + dev_err(psp->adev->dev, "alloc mem_train_ctx.sys_cache failed!\n"); ret = -ENOMEM; goto Err_out; } - DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n", - ctx->train_data_size, - ctx->p2c_train_data_offset, - ctx->c2p_train_data_offset); + dev_dbg(psp->adev->dev, + "train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n", + ctx->train_data_size, + ctx->p2c_train_data_offset, + ctx->c2p_train_data_offset); ctx->init = PSP_MEM_TRAIN_INIT_SUCCESS; return 0; @@ -407,7 +428,7 @@ static int psp_sw_init(void *handle) psp->cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); if (!psp->cmd) { - DRM_ERROR("Failed to allocate memory to command buffer!\n"); + dev_err(adev->dev, "Failed to allocate memory to command buffer!\n"); ret = -ENOMEM; } @@ -454,13 +475,13 @@ static int psp_sw_init(void *handle) if (mem_training_ctx->enable_mem_training) { ret = psp_memory_training_init(psp); if (ret) { - DRM_ERROR("Failed to initialize memory training!\n"); + dev_err(adev->dev, "Failed to initialize memory training!\n"); return ret; } ret = psp_mem_training(psp, PSP_MEM_TRAIN_COLD_BOOT); if (ret) { - DRM_ERROR("Failed to process memory training!\n"); + dev_err(adev->dev, "Failed to process memory training!\n"); return ret; } } @@ -626,7 +647,7 @@ psp_cmd_submit_buf(struct psp_context *psp, { int ret; int index; - int timeout = 20000; + int timeout = psp->adev->psp_timeout; bool ras_intr = false; bool skip_unsupport = false; @@ -675,9 +696,11 @@ psp_cmd_submit_buf(struct psp_context *psp, */ if (!skip_unsupport && (psp->cmd_buf_mem->resp.status || !timeout) && !ras_intr) { if (ucode) - DRM_WARN("failed to load ucode %s(0x%X) ", - amdgpu_ucode_name(ucode->ucode_id), ucode->ucode_id); - DRM_WARN("psp gfx command %s(0x%X) failed and response status is (0x%X)\n", + dev_warn(psp->adev->dev, + "failed to load ucode %s(0x%X) ", + amdgpu_ucode_name(ucode->ucode_id), ucode->ucode_id); + dev_warn(psp->adev->dev, + "psp gfx command %s(0x%X) failed and response status is (0x%X)\n", psp_gfx_cmd_name(psp->cmd_buf_mem->cmd_id), psp->cmd_buf_mem->cmd_id, psp->cmd_buf_mem->resp.status); /* If any firmware (including CAP) load fails under SRIOV, it should @@ -771,16 +794,6 @@ static int psp_load_toc(struct psp_context *psp, return ret; } -static bool psp_boottime_tmr(struct psp_context *psp) -{ - switch (amdgpu_ip_version(psp->adev, MP0_HWIP, 0)) { - case IP_VERSION(13, 0, 6): - return true; - default: - return false; - } -} - /* Set up Trusted Memory Region */ static int psp_tmr_init(struct psp_context *psp) { @@ -807,12 +820,12 @@ static int psp_tmr_init(struct psp_context *psp) psp->fw_pri_buf) { ret = psp_load_toc(psp, &tmr_size); if (ret) { - DRM_ERROR("Failed to load toc\n"); + dev_err(psp->adev->dev, "Failed to load toc\n"); return ret; } } - if (!psp->tmr_bo) { + if (!psp->tmr_bo && !psp->boot_time_tmr) { pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL; ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_ALIGNMENT, @@ -855,7 +868,7 @@ static int psp_tmr_load(struct psp_context *psp) psp_prep_tmr_cmd_buf(psp, cmd, psp->tmr_mc_addr, psp->tmr_bo); if (psp->tmr_bo) - DRM_INFO("reserve 0x%lx from 0x%llx for PSP TMR\n", + dev_info(psp->adev->dev, "reserve 0x%lx from 0x%llx for PSP TMR\n", amdgpu_bo_size(psp->tmr_bo), psp->tmr_mc_addr); ret = psp_cmd_submit_buf(psp, NULL, cmd, @@ -1113,7 +1126,7 @@ int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg, psp_prep_reg_prog_cmd_buf(cmd, reg, value); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); if (ret) - DRM_ERROR("PSP failed to program reg id %d", reg); + dev_err(psp->adev->dev, "PSP failed to program reg id %d\n", reg); release_psp_cmd_buf(psp); @@ -1526,22 +1539,22 @@ static void psp_ras_ta_check_status(struct psp_context *psp) switch (ras_cmd->ras_status) { case TA_RAS_STATUS__ERROR_UNSUPPORTED_IP: dev_warn(psp->adev->dev, - "RAS WARNING: cmd failed due to unsupported ip\n"); + "RAS WARNING: cmd failed due to unsupported ip\n"); break; case TA_RAS_STATUS__ERROR_UNSUPPORTED_ERROR_INJ: dev_warn(psp->adev->dev, - "RAS WARNING: cmd failed due to unsupported error injection\n"); + "RAS WARNING: cmd failed due to unsupported error injection\n"); break; case TA_RAS_STATUS__SUCCESS: break; case TA_RAS_STATUS__TEE_ERROR_ACCESS_DENIED: if (ras_cmd->cmd_id == TA_RAS_COMMAND__TRIGGER_ERROR) dev_warn(psp->adev->dev, - "RAS WARNING: Inject error to critical region is not allowed\n"); + "RAS WARNING: Inject error to critical region is not allowed\n"); break; default: dev_warn(psp->adev->dev, - "RAS WARNING: ras status = 0x%X\n", ras_cmd->ras_status); + "RAS WARNING: ras status = 0x%X\n", ras_cmd->ras_status); break; } } @@ -1565,7 +1578,7 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id) return ret; if (ras_cmd->if_version > RAS_TA_HOST_IF_VER) { - DRM_WARN("RAS: Unsupported Interface"); + dev_warn(psp->adev->dev, "RAS: Unsupported Interface\n"); return -EINVAL; } @@ -1715,7 +1728,7 @@ int psp_ras_initialize(struct psp_context *psp) psp->ras_context.context.initialized = true; else { if (ras_cmd->ras_status) - dev_warn(psp->adev->dev, "RAS Init Status: 0x%X\n", ras_cmd->ras_status); + dev_warn(adev->dev, "RAS Init Status: 0x%X\n", ras_cmd->ras_status); /* fail to load RAS TA */ psp->ras_context.context.initialized = false; @@ -1779,6 +1792,31 @@ int psp_ras_trigger_error(struct psp_context *psp, return 0; } + +int psp_ras_query_address(struct psp_context *psp, + struct ta_ras_query_address_input *addr_in, + struct ta_ras_query_address_output *addr_out) +{ + struct ta_ras_shared_memory *ras_cmd; + int ret; + + if (!psp->ras_context.context.initialized) + return -EINVAL; + + ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf; + memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory)); + + ras_cmd->cmd_id = TA_RAS_COMMAND__QUERY_ADDRESS; + ras_cmd->ras_in_message.address = *addr_in; + + ret = psp_ras_invoke(psp, ras_cmd->cmd_id); + if (ret || ras_cmd->ras_status || psp->cmd_buf_mem->resp.status) + return -EINVAL; + + *addr_out = ras_cmd->ras_out_message.address; + + return 0; +} // ras end // HDCP start @@ -1792,6 +1830,10 @@ static int psp_hdcp_initialize(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; + /* bypass hdcp initialization if dmu is harvested */ + if (!amdgpu_device_has_display_hardware(psp->adev)) + return 0; + if (!psp->hdcp_context.context.bin_desc.size_bytes || !psp->hdcp_context.context.bin_desc.start_addr) { dev_info(psp->adev->dev, "HDCP: optional hdcp ta ucode is not available\n"); @@ -1824,6 +1866,9 @@ int psp_hdcp_invoke(struct psp_context *psp, uint32_t ta_cmd_id) if (amdgpu_sriov_vf(psp->adev)) return 0; + if (!psp->hdcp_context.context.initialized) + return 0; + return psp_ta_invoke(psp, ta_cmd_id, &psp->hdcp_context.context); } @@ -1859,6 +1904,10 @@ static int psp_dtm_initialize(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; + /* bypass dtm initialization if dmu is harvested */ + if (!amdgpu_device_has_display_hardware(psp->adev)) + return 0; + if (!psp->dtm_context.context.bin_desc.size_bytes || !psp->dtm_context.context.bin_desc.start_addr) { dev_info(psp->adev->dev, "DTM: optional dtm ta ucode is not available\n"); @@ -1891,6 +1940,9 @@ int psp_dtm_invoke(struct psp_context *psp, uint32_t ta_cmd_id) if (amdgpu_sriov_vf(psp->adev)) return 0; + if (!psp->dtm_context.context.initialized) + return 0; + return psp_ta_invoke(psp, ta_cmd_id, &psp->dtm_context.context); } @@ -2025,6 +2077,10 @@ static int psp_securedisplay_initialize(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; + /* bypass securedisplay initialization if dmu is harvested */ + if (!amdgpu_device_has_display_hardware(psp->adev)) + return 0; + if (!psp->securedisplay_context.context.bin_desc.size_bytes || !psp->securedisplay_context.context.bin_desc.start_addr) { dev_info(psp->adev->dev, "SECUREDISPLAY: securedisplay ta ucode is not available\n"); @@ -2125,19 +2181,14 @@ int amdgpu_psp_wait_for_bootloader(struct amdgpu_device *adev) return ret; } -int amdgpu_psp_query_boot_status(struct amdgpu_device *adev) +bool amdgpu_psp_get_ras_capability(struct psp_context *psp) { - struct psp_context *psp = &adev->psp; - int ret = 0; - - if (amdgpu_sriov_vf(adev) || (adev->flags & AMD_IS_APU)) - return 0; - if (psp->funcs && - psp->funcs->query_boot_status) - ret = psp->funcs->query_boot_status(psp); - - return ret; + psp->funcs->get_ras_capability) { + return psp->funcs->get_ras_capability(psp); + } else { + return false; + } } static int psp_hw_start(struct psp_context *psp) @@ -2150,7 +2201,7 @@ static int psp_hw_start(struct psp_context *psp) (psp->funcs->bootloader_load_kdb != NULL)) { ret = psp_bootloader_load_kdb(psp); if (ret) { - DRM_ERROR("PSP load kdb failed!\n"); + dev_err(adev->dev, "PSP load kdb failed!\n"); return ret; } } @@ -2159,7 +2210,7 @@ static int psp_hw_start(struct psp_context *psp) (psp->funcs->bootloader_load_spl != NULL)) { ret = psp_bootloader_load_spl(psp); if (ret) { - DRM_ERROR("PSP load spl failed!\n"); + dev_err(adev->dev, "PSP load spl failed!\n"); return ret; } } @@ -2168,7 +2219,7 @@ static int psp_hw_start(struct psp_context *psp) (psp->funcs->bootloader_load_sysdrv != NULL)) { ret = psp_bootloader_load_sysdrv(psp); if (ret) { - DRM_ERROR("PSP load sys drv failed!\n"); + dev_err(adev->dev, "PSP load sys drv failed!\n"); return ret; } } @@ -2177,7 +2228,7 @@ static int psp_hw_start(struct psp_context *psp) (psp->funcs->bootloader_load_soc_drv != NULL)) { ret = psp_bootloader_load_soc_drv(psp); if (ret) { - DRM_ERROR("PSP load soc drv failed!\n"); + dev_err(adev->dev, "PSP load soc drv failed!\n"); return ret; } } @@ -2186,7 +2237,7 @@ static int psp_hw_start(struct psp_context *psp) (psp->funcs->bootloader_load_intf_drv != NULL)) { ret = psp_bootloader_load_intf_drv(psp); if (ret) { - DRM_ERROR("PSP load intf drv failed!\n"); + dev_err(adev->dev, "PSP load intf drv failed!\n"); return ret; } } @@ -2195,7 +2246,7 @@ static int psp_hw_start(struct psp_context *psp) (psp->funcs->bootloader_load_dbg_drv != NULL)) { ret = psp_bootloader_load_dbg_drv(psp); if (ret) { - DRM_ERROR("PSP load dbg drv failed!\n"); + dev_err(adev->dev, "PSP load dbg drv failed!\n"); return ret; } } @@ -2204,7 +2255,7 @@ static int psp_hw_start(struct psp_context *psp) (psp->funcs->bootloader_load_ras_drv != NULL)) { ret = psp_bootloader_load_ras_drv(psp); if (ret) { - DRM_ERROR("PSP load ras_drv failed!\n"); + dev_err(adev->dev, "PSP load ras_drv failed!\n"); return ret; } } @@ -2213,7 +2264,7 @@ static int psp_hw_start(struct psp_context *psp) (psp->funcs->bootloader_load_sos != NULL)) { ret = psp_bootloader_load_sos(psp); if (ret) { - DRM_ERROR("PSP load sos failed!\n"); + dev_err(adev->dev, "PSP load sos failed!\n"); return ret; } } @@ -2221,17 +2272,17 @@ static int psp_hw_start(struct psp_context *psp) ret = psp_ring_create(psp, PSP_RING_TYPE__KM); if (ret) { - DRM_ERROR("PSP create ring failed!\n"); + dev_err(adev->dev, "PSP create ring failed!\n"); return ret; } if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev)) goto skip_pin_bo; - if (!psp_boottime_tmr(psp)) { + if (!psp->boot_time_tmr || psp->autoload_supported) { ret = psp_tmr_init(psp); if (ret) { - DRM_ERROR("PSP tmr init failed!\n"); + dev_err(adev->dev, "PSP tmr init failed!\n"); return ret; } } @@ -2248,10 +2299,12 @@ skip_pin_bo: return ret; } - ret = psp_tmr_load(psp); - if (ret) { - DRM_ERROR("PSP load tmr failed!\n"); - return ret; + if (!psp->boot_time_tmr || !psp->autoload_supported) { + ret = psp_tmr_load(psp); + if (ret) { + dev_err(adev->dev, "PSP load tmr failed!\n"); + return ret; + } } return 0; @@ -2462,6 +2515,9 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode, case AMDGPU_UCODE_ID_P2S_TABLE: *type = GFX_FW_TYPE_P2S_TABLE; break; + case AMDGPU_UCODE_ID_JPEG_RAM: + *type = GFX_FW_TYPE_JPEG_RAM; + break; case AMDGPU_UCODE_ID_MAXIMUM: default: return -EINVAL; @@ -2518,7 +2574,8 @@ static void psp_print_fw_hdr(struct psp_context *psp, } } -static int psp_prep_load_ip_fw_cmd_buf(struct amdgpu_firmware_info *ucode, +static int psp_prep_load_ip_fw_cmd_buf(struct psp_context *psp, + struct amdgpu_firmware_info *ucode, struct psp_gfx_cmd_resp *cmd) { int ret; @@ -2531,7 +2588,7 @@ static int psp_prep_load_ip_fw_cmd_buf(struct amdgpu_firmware_info *ucode, ret = psp_get_fw_type(ucode, &cmd->cmd.cmd_load_ip_fw.fw_type); if (ret) - DRM_ERROR("Unknown firmware type\n"); + dev_err(psp->adev->dev, "Unknown firmware type\n"); return ret; } @@ -2542,7 +2599,7 @@ int psp_execute_ip_fw_load(struct psp_context *psp, int ret = 0; struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp); - ret = psp_prep_load_ip_fw_cmd_buf(ucode, cmd); + ret = psp_prep_load_ip_fw_cmd_buf(psp, ucode, cmd); if (!ret) { ret = psp_cmd_submit_buf(psp, ucode, cmd, psp->fence_buf_mc_addr); @@ -2601,13 +2658,13 @@ static int psp_load_smu_fw(struct psp_context *psp) amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(11, 0, 2)))) { ret = amdgpu_dpm_set_mp1_state(adev, PP_MP1_STATE_UNLOAD); if (ret) - DRM_WARN("Failed to set MP1 state prepare for reload\n"); + dev_err(adev->dev, "Failed to set MP1 state prepare for reload\n"); } ret = psp_execute_ip_fw_load(psp, ucode); if (ret) - DRM_ERROR("PSP load smu failed!\n"); + dev_err(adev->dev, "PSP load smu failed!\n"); return ret; } @@ -2712,7 +2769,7 @@ static int psp_load_non_psp_fw(struct psp_context *psp) adev->virt.autoload_ucode_id : AMDGPU_UCODE_ID_RLC_G)) { ret = psp_rlc_autoload_start(psp); if (ret) { - DRM_ERROR("Failed to start rlc autoload\n"); + dev_err(adev->dev, "Failed to start rlc autoload\n"); return ret; } } @@ -2734,7 +2791,7 @@ static int psp_load_fw(struct amdgpu_device *adev) ret = psp_ring_init(psp, PSP_RING_TYPE__KM); if (ret) { - DRM_ERROR("PSP ring init failed!\n"); + dev_err(adev->dev, "PSP ring init failed!\n"); goto failed; } } @@ -2749,13 +2806,13 @@ static int psp_load_fw(struct amdgpu_device *adev) ret = psp_asd_initialize(psp); if (ret) { - DRM_ERROR("PSP load asd failed!\n"); + dev_err(adev->dev, "PSP load asd failed!\n"); goto failed1; } ret = psp_rl_load(adev); if (ret) { - DRM_ERROR("PSP load RL failed!\n"); + dev_err(adev->dev, "PSP load RL failed!\n"); goto failed1; } @@ -2775,7 +2832,7 @@ static int psp_load_fw(struct amdgpu_device *adev) ret = psp_ras_initialize(psp); if (ret) dev_err(psp->adev->dev, - "RAS: Failed to initialize RAS\n"); + "RAS: Failed to initialize RAS\n"); ret = psp_hdcp_initialize(psp); if (ret) @@ -2828,7 +2885,7 @@ static int psp_hw_init(void *handle) ret = psp_load_fw(adev); if (ret) { - DRM_ERROR("PSP firmware loading failed\n"); + dev_err(adev->dev, "PSP firmware loading failed\n"); goto failed; } @@ -2875,7 +2932,7 @@ static int psp_suspend(void *handle) psp->xgmi_context.context.initialized) { ret = psp_xgmi_terminate(psp); if (ret) { - DRM_ERROR("Failed to terminate xgmi ta\n"); + dev_err(adev->dev, "Failed to terminate xgmi ta\n"); goto out; } } @@ -2883,46 +2940,46 @@ static int psp_suspend(void *handle) if (psp->ta_fw) { ret = psp_ras_terminate(psp); if (ret) { - DRM_ERROR("Failed to terminate ras ta\n"); + dev_err(adev->dev, "Failed to terminate ras ta\n"); goto out; } ret = psp_hdcp_terminate(psp); if (ret) { - DRM_ERROR("Failed to terminate hdcp ta\n"); + dev_err(adev->dev, "Failed to terminate hdcp ta\n"); goto out; } ret = psp_dtm_terminate(psp); if (ret) { - DRM_ERROR("Failed to terminate dtm ta\n"); + dev_err(adev->dev, "Failed to terminate dtm ta\n"); goto out; } ret = psp_rap_terminate(psp); if (ret) { - DRM_ERROR("Failed to terminate rap ta\n"); + dev_err(adev->dev, "Failed to terminate rap ta\n"); goto out; } ret = psp_securedisplay_terminate(psp); if (ret) { - DRM_ERROR("Failed to terminate securedisplay ta\n"); + dev_err(adev->dev, "Failed to terminate securedisplay ta\n"); goto out; } } ret = psp_asd_terminate(psp); if (ret) { - DRM_ERROR("Failed to terminate asd\n"); + dev_err(adev->dev, "Failed to terminate asd\n"); goto out; } ret = psp_tmr_terminate(psp); if (ret) { - DRM_ERROR("Failed to terminate tmr\n"); + dev_err(adev->dev, "Failed to terminate tmr\n"); goto out; } ret = psp_ring_stop(psp, PSP_RING_TYPE__KM); if (ret) - DRM_ERROR("PSP ring stop failed\n"); + dev_err(adev->dev, "PSP ring stop failed\n"); out: return ret; @@ -2934,12 +2991,12 @@ static int psp_resume(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct psp_context *psp = &adev->psp; - DRM_INFO("PSP is resuming...\n"); + dev_info(adev->dev, "PSP is resuming...\n"); if (psp->mem_train_ctx.enable_mem_training) { ret = psp_mem_training(psp, PSP_MEM_TRAIN_RESUME); if (ret) { - DRM_ERROR("Failed to process memory training!\n"); + dev_err(adev->dev, "Failed to process memory training!\n"); return ret; } } @@ -2956,7 +3013,7 @@ static int psp_resume(void *handle) ret = psp_asd_initialize(psp); if (ret) { - DRM_ERROR("PSP load asd failed!\n"); + dev_err(adev->dev, "PSP load asd failed!\n"); goto failed; } @@ -2980,7 +3037,7 @@ static int psp_resume(void *handle) ret = psp_ras_initialize(psp); if (ret) dev_err(psp->adev->dev, - "RAS: Failed to initialize RAS\n"); + "RAS: Failed to initialize RAS\n"); ret = psp_hdcp_initialize(psp); if (ret) @@ -3008,7 +3065,7 @@ static int psp_resume(void *handle) return 0; failed: - DRM_ERROR("PSP resume failed\n"); + dev_err(adev->dev, "PSP resume failed\n"); mutex_unlock(&adev->firmware.mutex); return ret; } @@ -3069,9 +3126,11 @@ int psp_ring_cmd_submit(struct psp_context *psp, write_frame = ring_buffer_start + (psp_write_ptr_reg / rb_frame_size_dw); /* Check invalid write_frame ptr address */ if ((write_frame < ring_buffer_start) || (ring_buffer_end < write_frame)) { - DRM_ERROR("ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n", - ring_buffer_start, ring_buffer_end, write_frame); - DRM_ERROR("write_frame is pointing to address out of bounds\n"); + dev_err(adev->dev, + "ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n", + ring_buffer_start, ring_buffer_end, write_frame); + dev_err(adev->dev, + "write_frame is pointing to address out of bounds\n"); return -EINVAL; } @@ -3597,7 +3656,7 @@ static ssize_t psp_usbc_pd_fw_sysfs_read(struct device *dev, int ret; if (!adev->ip_blocks[AMD_IP_BLOCK_TYPE_PSP].status.late_initialized) { - DRM_INFO("PSP block is not ready yet."); + dev_info(adev->dev, "PSP block is not ready yet\n."); return -EBUSY; } @@ -3606,7 +3665,7 @@ static ssize_t psp_usbc_pd_fw_sysfs_read(struct device *dev, mutex_unlock(&adev->psp.mutex); if (ret) { - DRM_ERROR("Failed to read USBC PD FW, err = %d", ret); + dev_err(adev->dev, "Failed to read USBC PD FW, err = %d\n", ret); return ret; } @@ -3628,7 +3687,7 @@ static ssize_t psp_usbc_pd_fw_sysfs_write(struct device *dev, void *fw_pri_cpu_addr; if (!adev->ip_blocks[AMD_IP_BLOCK_TYPE_PSP].status.late_initialized) { - DRM_INFO("PSP block is not ready yet."); + dev_err(adev->dev, "PSP block is not ready yet."); return -EBUSY; } @@ -3661,7 +3720,7 @@ rel_buf: release_firmware(usbc_pd_fw); fail: if (ret) { - DRM_ERROR("Failed to load USBC PD FW, err = %d", ret); + dev_err(adev->dev, "Failed to load USBC PD FW, err = %d", ret); count = ret; } @@ -3708,7 +3767,7 @@ static ssize_t amdgpu_psp_vbflash_write(struct file *filp, struct kobject *kobj, /* Safeguard against memory drain */ if (adev->psp.vbflash_image_size > AMD_VBIOS_FILE_MAX_SIZE_B) { - dev_err(adev->dev, "File size cannot exceed %u", AMD_VBIOS_FILE_MAX_SIZE_B); + dev_err(adev->dev, "File size cannot exceed %u\n", AMD_VBIOS_FILE_MAX_SIZE_B); kvfree(adev->psp.vbflash_tmp_buf); adev->psp.vbflash_tmp_buf = NULL; adev->psp.vbflash_image_size = 0; @@ -3727,7 +3786,7 @@ static ssize_t amdgpu_psp_vbflash_write(struct file *filp, struct kobject *kobj, adev->psp.vbflash_image_size += count; mutex_unlock(&adev->psp.mutex); - dev_dbg(adev->dev, "IFWI staged for update"); + dev_dbg(adev->dev, "IFWI staged for update\n"); return count; } @@ -3747,7 +3806,7 @@ static ssize_t amdgpu_psp_vbflash_read(struct file *filp, struct kobject *kobj, if (adev->psp.vbflash_image_size == 0) return -EINVAL; - dev_dbg(adev->dev, "PSP IFWI flash process initiated"); + dev_dbg(adev->dev, "PSP IFWI flash process initiated\n"); ret = amdgpu_bo_create_kernel(adev, adev->psp.vbflash_image_size, AMDGPU_GPU_PAGE_SIZE, @@ -3772,11 +3831,11 @@ rel_buf: adev->psp.vbflash_image_size = 0; if (ret) { - dev_err(adev->dev, "Failed to load IFWI, err = %d", ret); + dev_err(adev->dev, "Failed to load IFWI, err = %d\n", ret); return ret; } - dev_dbg(adev->dev, "PSP IFWI flash process done"); + dev_dbg(adev->dev, "PSP IFWI flash process done\n"); return 0; } @@ -3930,3 +3989,11 @@ const struct amdgpu_ip_block_version psp_v13_0_4_ip_block = { .rev = 4, .funcs = &psp_ip_funcs, }; + +const struct amdgpu_ip_block_version psp_v14_0_ip_block = { + .type = AMD_IP_BLOCK_TYPE_PSP, + .major = 14, + .minor = 0, + .rev = 0, + .funcs = &psp_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index c4d9cbde55..ee16f134ae 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -134,7 +134,7 @@ struct psp_funcs { int (*update_spirom)(struct psp_context *psp, uint64_t fw_pri_mc_addr); int (*vbflash_stat)(struct psp_context *psp); int (*fatal_error_recovery_quirk)(struct psp_context *psp); - int (*query_boot_status)(struct psp_context *psp); + bool (*get_ras_capability)(struct psp_context *psp); }; struct ta_funcs { @@ -203,7 +203,7 @@ struct psp_ras_context { #define GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES 0x1000 #define GDDR6_MEM_TRAINING_OFFSET 0x8000 /*Define the VRAM size that will be encroached by BIST training.*/ -#define GDDR6_MEM_TRAINING_ENCROACHED_SIZE 0x2000000 +#define BIST_MEM_TRAINING_ENCROACHED_SIZE 0x2000000 enum psp_memory_training_init_flag { PSP_MEM_TRAIN_NOT_SUPPORT = 0x0, @@ -364,6 +364,8 @@ struct psp_context { atomic_t fence_value; /* flag to mark whether gfx fw autoload is supported or not */ bool autoload_supported; + /* flag to mark whether psp use runtime TMR or boottime TMR */ + bool boot_time_tmr; /* flag to mark whether df cstate management centralized to PMFW */ bool pmfw_centralized_cstate_management; @@ -463,6 +465,7 @@ extern const struct amdgpu_ip_block_version psp_v11_0_8_ip_block; extern const struct amdgpu_ip_block_version psp_v12_0_ip_block; extern const struct amdgpu_ip_block_version psp_v13_0_ip_block; extern const struct amdgpu_ip_block_version psp_v13_0_4_ip_block; +extern const struct amdgpu_ip_block_version psp_v14_0_ip_block; extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index, uint32_t field_val, uint32_t mask, bool check_changed); @@ -502,6 +505,9 @@ int psp_ras_enable_features(struct psp_context *psp, int psp_ras_trigger_error(struct psp_context *psp, struct ta_ras_trigger_error_input *info, uint32_t instance_mask); int psp_ras_terminate(struct psp_context *psp); +int psp_ras_query_address(struct psp_context *psp, + struct ta_ras_query_address_input *addr_in, + struct ta_ras_query_address_output *addr_out); int psp_hdcp_invoke(struct psp_context *psp, uint32_t ta_cmd_id); int psp_dtm_invoke(struct psp_context *psp, uint32_t ta_cmd_id); @@ -538,7 +544,5 @@ int psp_spatial_partition(struct psp_context *psp, int mode); int is_psp_fw_valid(struct psp_bin_desc bin); int amdgpu_psp_wait_for_bootloader(struct amdgpu_device *adev); - -int amdgpu_psp_query_boot_status(struct amdgpu_device *adev); - +bool amdgpu_psp_get_ras_capability(struct psp_context *psp); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 31823a30de..8ebab6f22e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -39,6 +39,7 @@ #include "nbio_v7_9.h" #include "atom.h" #include "amdgpu_reset.h" +#include "amdgpu_psp.h" #ifdef CONFIG_X86_MCE_AMD #include <asm/mce.h> @@ -73,6 +74,8 @@ const char *ras_block_string[] = { "mca", "vcn", "jpeg", + "ih", + "mpio", }; const char *ras_mca_block_string[] = { @@ -94,7 +97,8 @@ const char *get_ras_block_str(struct ras_common_if *ras_block) if (!ras_block) return "NULL"; - if (ras_block->block >= AMDGPU_RAS_BLOCK_COUNT) + if (ras_block->block >= AMDGPU_RAS_BLOCK_COUNT || + ras_block->block >= ARRAY_SIZE(ras_block_string)) return "OUT OF RANGE"; if (ras_block->block == AMDGPU_RAS_BLOCK__MCA) @@ -116,6 +120,8 @@ const char *get_ras_block_str(struct ras_common_if *ras_block) /* typical ECC bad page rate is 1 bad page per 100MB VRAM */ #define RAS_BAD_PAGE_COVER (100 * 1024 * 1024ULL) +#define MAX_UMC_POISON_POLLING_TIME_ASYNC 100 //ms + enum amdgpu_ras_retire_page_reservation { AMDGPU_RAS_RETIRE_PAGE_RESERVED, AMDGPU_RAS_RETIRE_PAGE_PENDING, @@ -628,8 +634,12 @@ static ssize_t amdgpu_ras_sysfs_read(struct device *dev, dev_warn(obj->adev->dev, "Failed to reset error counter and error status"); } - return sysfs_emit(buf, "%s: %lu\n%s: %lu\n", "ue", info.ue_count, - "ce", info.ce_count); + if (info.head.block == AMDGPU_RAS_BLOCK__UMC) + return sysfs_emit(buf, "%s: %lu\n%s: %lu\n%s: %lu\n", "ue", info.ue_count, + "ce", info.ce_count, "de", info.de_count); + else + return sysfs_emit(buf, "%s: %lu\n%s: %lu\n", "ue", info.ue_count, + "ce", info.ce_count); } /* obj begin */ @@ -1036,7 +1046,8 @@ static void amdgpu_ras_error_print_error_data(struct amdgpu_device *adev, struct ras_manager *ras_mgr, struct ras_err_data *err_data, const char *blk_name, - bool is_ue) + bool is_ue, + bool is_de) { struct amdgpu_smuio_mcm_config_info *mcm_info; struct ras_err_node *err_node; @@ -1065,25 +1076,50 @@ static void amdgpu_ras_error_print_error_data(struct amdgpu_device *adev, } } else { - for_each_ras_error(err_node, err_data) { - err_info = &err_node->err_info; - mcm_info = &err_info->mcm_info; - if (err_info->ce_count) { + if (is_de) { + for_each_ras_error(err_node, err_data) { + err_info = &err_node->err_info; + mcm_info = &err_info->mcm_info; + if (err_info->de_count) { + dev_info(adev->dev, "socket: %d, die: %d, " + "%lld new deferred hardware errors detected in %s block\n", + mcm_info->socket_id, + mcm_info->die_id, + err_info->de_count, + blk_name); + } + } + + for_each_ras_error(err_node, &ras_mgr->err_data) { + err_info = &err_node->err_info; + mcm_info = &err_info->mcm_info; dev_info(adev->dev, "socket: %d, die: %d, " - "%lld new correctable hardware errors detected in %s block\n", - mcm_info->socket_id, - mcm_info->die_id, - err_info->ce_count, - blk_name); + "%lld deferred hardware errors detected in total in %s block\n", + mcm_info->socket_id, mcm_info->die_id, + err_info->de_count, blk_name); + } + } else { + for_each_ras_error(err_node, err_data) { + err_info = &err_node->err_info; + mcm_info = &err_info->mcm_info; + if (err_info->ce_count) { + dev_info(adev->dev, "socket: %d, die: %d, " + "%lld new correctable hardware errors detected in %s block\n", + mcm_info->socket_id, + mcm_info->die_id, + err_info->ce_count, + blk_name); + } } - } - for_each_ras_error(err_node, &ras_mgr->err_data) { - err_info = &err_node->err_info; - mcm_info = &err_info->mcm_info; - dev_info(adev->dev, "socket: %d, die: %d, " - "%lld correctable hardware errors detected in total in %s block\n", - mcm_info->socket_id, mcm_info->die_id, err_info->ce_count, blk_name); + for_each_ras_error(err_node, &ras_mgr->err_data) { + err_info = &err_node->err_info; + mcm_info = &err_info->mcm_info; + dev_info(adev->dev, "socket: %d, die: %d, " + "%lld correctable hardware errors detected in total in %s block\n", + mcm_info->socket_id, mcm_info->die_id, + err_info->ce_count, blk_name); + } } } } @@ -1102,7 +1138,8 @@ static void amdgpu_ras_error_generate_report(struct amdgpu_device *adev, if (err_data->ce_count) { if (err_data_has_source_info(err_data)) { - amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, blk_name, false); + amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, + blk_name, false, false); } else if (!adev->aid_mask && adev->smuio.funcs && adev->smuio.funcs->get_socket_id && @@ -1124,7 +1161,8 @@ static void amdgpu_ras_error_generate_report(struct amdgpu_device *adev, if (err_data->ue_count) { if (err_data_has_source_info(err_data)) { - amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, blk_name, true); + amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, + blk_name, true, false); } else if (!adev->aid_mask && adev->smuio.funcs && adev->smuio.funcs->get_socket_id && @@ -1144,6 +1182,28 @@ static void amdgpu_ras_error_generate_report(struct amdgpu_device *adev, } } + if (err_data->de_count) { + if (err_data_has_source_info(err_data)) { + amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, + blk_name, false, true); + } else if (!adev->aid_mask && + adev->smuio.funcs && + adev->smuio.funcs->get_socket_id && + adev->smuio.funcs->get_die_id) { + dev_info(adev->dev, "socket: %d, die: %d " + "%ld deferred hardware errors " + "detected in %s block\n", + adev->smuio.funcs->get_socket_id(adev), + adev->smuio.funcs->get_die_id(adev), + ras_mgr->err_data.de_count, + blk_name); + } else { + dev_info(adev->dev, "%ld deferred hardware errors " + "detected in %s block\n", + ras_mgr->err_data.de_count, + blk_name); + } + } } static void amdgpu_rasmgr_error_data_statistic_update(struct ras_manager *obj, struct ras_err_data *err_data) @@ -1154,7 +1214,8 @@ static void amdgpu_rasmgr_error_data_statistic_update(struct ras_manager *obj, s if (err_data_has_source_info(err_data)) { for_each_ras_error(err_node, err_data) { err_info = &err_node->err_info; - + amdgpu_ras_error_statistic_de_count(&obj->err_data, + &err_info->mcm_info, NULL, err_info->de_count); amdgpu_ras_error_statistic_ce_count(&obj->err_data, &err_info->mcm_info, NULL, err_info->ce_count); amdgpu_ras_error_statistic_ue_count(&obj->err_data, @@ -1164,9 +1225,72 @@ static void amdgpu_rasmgr_error_data_statistic_update(struct ras_manager *obj, s /* for legacy asic path which doesn't has error source info */ obj->err_data.ue_count += err_data->ue_count; obj->err_data.ce_count += err_data->ce_count; + obj->err_data.de_count += err_data->de_count; } } +static struct ras_manager *get_ras_manager(struct amdgpu_device *adev, enum amdgpu_ras_block blk) +{ + struct ras_common_if head; + + memset(&head, 0, sizeof(head)); + head.block = blk; + + return amdgpu_ras_find_obj(adev, &head); +} + +int amdgpu_ras_bind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk, + const struct aca_info *aca_info, void *data) +{ + struct ras_manager *obj; + + obj = get_ras_manager(adev, blk); + if (!obj) + return -EINVAL; + + return amdgpu_aca_add_handle(adev, &obj->aca_handle, ras_block_str(blk), aca_info, data); +} + +int amdgpu_ras_unbind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk) +{ + struct ras_manager *obj; + + obj = get_ras_manager(adev, blk); + if (!obj) + return -EINVAL; + + amdgpu_aca_remove_handle(&obj->aca_handle); + + return 0; +} + +static int amdgpu_aca_log_ras_error_data(struct amdgpu_device *adev, enum amdgpu_ras_block blk, + enum aca_error_type type, struct ras_err_data *err_data) +{ + struct ras_manager *obj; + + obj = get_ras_manager(adev, blk); + if (!obj) + return -EINVAL; + + return amdgpu_aca_get_error_data(adev, &obj->aca_handle, type, err_data); +} + +ssize_t amdgpu_ras_aca_sysfs_read(struct device *dev, struct device_attribute *attr, + struct aca_handle *handle, char *buf, void *data) +{ + struct ras_manager *obj = container_of(handle, struct ras_manager, aca_handle); + struct ras_query_if info = { + .head = obj->head, + }; + + if (amdgpu_ras_query_error_status(obj->adev, &info)) + return -EINVAL; + + return sysfs_emit(buf, "%s: %lu\n%s: %lu\n", "ue", info.ue_count, + "ce", info.ce_count); +} + static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev, struct ras_query_if *info, struct ras_err_data *err_data, @@ -1174,6 +1298,7 @@ static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev, { enum amdgpu_ras_block blk = info ? info->head.block : AMDGPU_RAS_BLOCK_COUNT; struct amdgpu_ras_block_object *block_obj = NULL; + int ret; if (blk == AMDGPU_RAS_BLOCK_COUNT) return -EINVAL; @@ -1203,9 +1328,19 @@ static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev, } } } else { - /* FIXME: add code to check return value later */ - amdgpu_mca_smu_log_ras_error(adev, blk, AMDGPU_MCA_ERROR_TYPE_UE, err_data); - amdgpu_mca_smu_log_ras_error(adev, blk, AMDGPU_MCA_ERROR_TYPE_CE, err_data); + if (amdgpu_aca_is_enabled(adev)) { + ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_UE, err_data); + if (ret) + return ret; + + ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_CE, err_data); + if (ret) + return ret; + } else { + /* FIXME: add code to check return value later */ + amdgpu_mca_smu_log_ras_error(adev, blk, AMDGPU_MCA_ERROR_TYPE_UE, err_data); + amdgpu_mca_smu_log_ras_error(adev, blk, AMDGPU_MCA_ERROR_TYPE_CE, err_data); + } } return 0; @@ -1239,6 +1374,7 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, struct ras_query_i info->ue_count = obj->err_data.ue_count; info->ce_count = obj->err_data.ce_count; + info->de_count = obj->err_data.de_count; amdgpu_ras_error_generate_report(adev, info, &err_data); @@ -1254,6 +1390,7 @@ int amdgpu_ras_reset_error_count(struct amdgpu_device *adev, struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, block, 0); struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; + const struct aca_smu_funcs *smu_funcs = adev->aca.smu_funcs; struct amdgpu_hive_info *hive; int hive_ras_recovery = 0; @@ -1264,7 +1401,7 @@ int amdgpu_ras_reset_error_count(struct amdgpu_device *adev, } if (!amdgpu_ras_is_supported(adev, block) || - !amdgpu_ras_get_mca_debug_mode(adev)) + !amdgpu_ras_get_aca_debug_mode(adev)) return -EOPNOTSUPP; hive = amdgpu_get_xgmi_hive(adev); @@ -1276,7 +1413,8 @@ int amdgpu_ras_reset_error_count(struct amdgpu_device *adev, /* skip ras error reset in gpu reset */ if ((amdgpu_in_reset(adev) || atomic_read(&ras->in_recovery) || hive_ras_recovery) && - mca_funcs && mca_funcs->mca_set_debug_mode) + ((smu_funcs && smu_funcs->set_debug_mode) || + (mca_funcs && mca_funcs->mca_set_debug_mode))) return -EOPNOTSUPP; if (block_obj->hw_ops->reset_ras_error_count) @@ -1772,7 +1910,10 @@ void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev) } } - amdgpu_mca_smu_debugfs_init(adev, dir); + if (amdgpu_aca_is_enabled(adev)) + amdgpu_aca_smu_debugfs_init(adev, dir); + else + amdgpu_mca_smu_debugfs_init(adev, dir); } /* debugfs end */ @@ -1900,7 +2041,7 @@ static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager * } } - amdgpu_umc_poison_handler(adev, false); + amdgpu_umc_poison_handler(adev, obj->head.block, false); if (block_obj->hw_ops && block_obj->hw_ops->handle_poison_consumption) poison_stat = block_obj->hw_ops->handle_poison_consumption(adev); @@ -1951,6 +2092,7 @@ static void amdgpu_ras_interrupt_umc_handler(struct ras_manager *obj, */ obj->err_data.ue_count += err_data.ue_count; obj->err_data.ce_count += err_data.ce_count; + obj->err_data.de_count += err_data.de_count; } amdgpu_ras_error_data_fini(&err_data); @@ -2297,6 +2439,18 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) ras->gpu_reset_flags &= ~AMDGPU_RAS_GPU_RESET_MODE1_RESET; set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + /* For any RAS error that needs a full reset to + * recover, set the fatal error status + */ + if (hive) { + list_for_each_entry(remote_adev, + &hive->device_list, + gmc.xgmi.head) + amdgpu_ras_set_fed(remote_adev, + true); + } else { + amdgpu_ras_set_fed(adev, true); + } psp_fatal_error_recovery_quirk(&adev->psp); } } @@ -2520,6 +2674,32 @@ static void amdgpu_ras_validate_threshold(struct amdgpu_device *adev, } } +static int amdgpu_ras_page_retirement_thread(void *param) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)param; + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + + while (!kthread_should_stop()) { + + wait_event_interruptible(con->page_retirement_wq, + kthread_should_stop() || + atomic_read(&con->page_retirement_req_cnt)); + + if (kthread_should_stop()) + break; + + dev_info(adev->dev, "Start processing page retirement. request:%d\n", + atomic_read(&con->page_retirement_req_cnt)); + + atomic_dec(&con->page_retirement_req_cnt); + + amdgpu_umc_bad_page_polling_timeout(adev, + false, MAX_UMC_POISON_POLLING_TIME_ASYNC); + } + + return 0; +} + int amdgpu_ras_recovery_init(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); @@ -2583,6 +2763,16 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) } } + mutex_init(&con->page_retirement_lock); + init_waitqueue_head(&con->page_retirement_wq); + atomic_set(&con->page_retirement_req_cnt, 0); + con->page_retirement_thread = + kthread_run(amdgpu_ras_page_retirement_thread, adev, "umc_page_retirement"); + if (IS_ERR(con->page_retirement_thread)) { + con->page_retirement_thread = NULL; + dev_warn(adev->dev, "Failed to create umc_page_retirement thread!!!\n"); + } + #ifdef CONFIG_X86_MCE_AMD if ((adev->asic_type == CHIP_ALDEBARAN) && (adev->gmc.xgmi.connected_to_cpu)) @@ -2618,6 +2808,11 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev) if (!data) return 0; + if (con->page_retirement_thread) + kthread_stop(con->page_retirement_thread); + + atomic_set(&con->page_retirement_req_cnt, 0); + cancel_work_sync(&con->recovery_work); mutex_lock(&con->recovery_lock); @@ -2679,6 +2874,87 @@ static void amdgpu_ras_get_quirks(struct amdgpu_device *adev) adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__GFX); } +/* Query ras capablity via atomfirmware interface */ +static void amdgpu_ras_query_ras_capablity_from_vbios(struct amdgpu_device *adev) +{ + /* mem_ecc cap */ + if (amdgpu_atomfirmware_mem_ecc_supported(adev)) { + dev_info(adev->dev, "MEM ECC is active.\n"); + adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__UMC | + 1 << AMDGPU_RAS_BLOCK__DF); + } else { + dev_info(adev->dev, "MEM ECC is not presented.\n"); + } + + /* sram_ecc cap */ + if (amdgpu_atomfirmware_sram_ecc_supported(adev)) { + dev_info(adev->dev, "SRAM ECC is active.\n"); + if (!amdgpu_sriov_vf(adev)) + adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC | + 1 << AMDGPU_RAS_BLOCK__DF); + else + adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__PCIE_BIF | + 1 << AMDGPU_RAS_BLOCK__SDMA | + 1 << AMDGPU_RAS_BLOCK__GFX); + + /* + * VCN/JPEG RAS can be supported on both bare metal and + * SRIOV environment + */ + if (amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(2, 6, 0) || + amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 0) || + amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 3)) + adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN | + 1 << AMDGPU_RAS_BLOCK__JPEG); + else + adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__VCN | + 1 << AMDGPU_RAS_BLOCK__JPEG); + + /* + * XGMI RAS is not supported if xgmi num physical nodes + * is zero + */ + if (!adev->gmc.xgmi.num_physical_nodes) + adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__XGMI_WAFL); + } else { + dev_info(adev->dev, "SRAM ECC is not presented.\n"); + } +} + +/* Query poison mode from umc/df IP callbacks */ +static void amdgpu_ras_query_poison_mode(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + bool df_poison, umc_poison; + + /* poison setting is useless on SRIOV guest */ + if (amdgpu_sriov_vf(adev) || !con) + return; + + /* Init poison supported flag, the default value is false */ + if (adev->gmc.xgmi.connected_to_cpu || + adev->gmc.is_app_apu) { + /* enabled by default when GPU is connected to CPU */ + con->poison_supported = true; + } else if (adev->df.funcs && + adev->df.funcs->query_ras_poison_mode && + adev->umc.ras && + adev->umc.ras->query_ras_poison_mode) { + df_poison = + adev->df.funcs->query_ras_poison_mode(adev); + umc_poison = + adev->umc.ras->query_ras_poison_mode(adev); + + /* Only poison is set in both DF and UMC, we can support it */ + if (df_poison && umc_poison) + con->poison_supported = true; + else if (df_poison != umc_poison) + dev_warn(adev->dev, + "Poison setting is inconsistent in DF/UMC(%d:%d)!\n", + df_poison, umc_poison); + } +} + /* * check hardware's ras ability which will be saved in hw_supported. * if hardware does not support ras, we can skip some ras initializtion and @@ -2695,49 +2971,13 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev) if (!amdgpu_ras_asic_supported(adev)) return; - if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) { - if (amdgpu_atomfirmware_mem_ecc_supported(adev)) { - dev_info(adev->dev, "MEM ECC is active.\n"); - adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__UMC | - 1 << AMDGPU_RAS_BLOCK__DF); - } else { - dev_info(adev->dev, "MEM ECC is not presented.\n"); - } + /* query ras capability from psp */ + if (amdgpu_psp_get_ras_capability(&adev->psp)) + goto init_ras_enabled_flag; - if (amdgpu_atomfirmware_sram_ecc_supported(adev)) { - dev_info(adev->dev, "SRAM ECC is active.\n"); - if (!amdgpu_sriov_vf(adev)) - adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC | - 1 << AMDGPU_RAS_BLOCK__DF); - else - adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__PCIE_BIF | - 1 << AMDGPU_RAS_BLOCK__SDMA | - 1 << AMDGPU_RAS_BLOCK__GFX); - - /* VCN/JPEG RAS can be supported on both bare metal and - * SRIOV environment - */ - if (amdgpu_ip_version(adev, VCN_HWIP, 0) == - IP_VERSION(2, 6, 0) || - amdgpu_ip_version(adev, VCN_HWIP, 0) == - IP_VERSION(4, 0, 0) || - amdgpu_ip_version(adev, VCN_HWIP, 0) == - IP_VERSION(4, 0, 3)) - adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN | - 1 << AMDGPU_RAS_BLOCK__JPEG); - else - adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__VCN | - 1 << AMDGPU_RAS_BLOCK__JPEG); - - /* - * XGMI RAS is not supported if xgmi num physical nodes - * is zero - */ - if (!adev->gmc.xgmi.num_physical_nodes) - adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__XGMI_WAFL); - } else { - dev_info(adev->dev, "SRAM ECC is not presented.\n"); - } + /* query ras capablity from bios */ + if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) { + amdgpu_ras_query_ras_capablity_from_vbios(adev); } else { /* driver only manages a few IP blocks RAS feature * when GPU is connected cpu through XGMI */ @@ -2746,13 +2986,21 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev) 1 << AMDGPU_RAS_BLOCK__MMHUB); } + /* apply asic specific settings (vega20 only for now) */ amdgpu_ras_get_quirks(adev); + /* query poison mode from umc/df ip callback */ + amdgpu_ras_query_poison_mode(adev); + +init_ras_enabled_flag: /* hw_supported needs to be aligned with RAS block mask. */ adev->ras_hw_enabled &= AMDGPU_RAS_BLOCK_MASK; adev->ras_enabled = amdgpu_ras_enable == 0 ? 0 : adev->ras_hw_enabled & amdgpu_ras_mask; + + /* aca is disabled by default */ + adev->aca.is_enabled = false; } static void amdgpu_ras_counte_dw(struct work_struct *work) @@ -2780,39 +3028,6 @@ Out: pm_runtime_put_autosuspend(dev->dev); } -static void amdgpu_ras_query_poison_mode(struct amdgpu_device *adev) -{ - struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - bool df_poison, umc_poison; - - /* poison setting is useless on SRIOV guest */ - if (amdgpu_sriov_vf(adev) || !con) - return; - - /* Init poison supported flag, the default value is false */ - if (adev->gmc.xgmi.connected_to_cpu || - adev->gmc.is_app_apu) { - /* enabled by default when GPU is connected to CPU */ - con->poison_supported = true; - } else if (adev->df.funcs && - adev->df.funcs->query_ras_poison_mode && - adev->umc.ras && - adev->umc.ras->query_ras_poison_mode) { - df_poison = - adev->df.funcs->query_ras_poison_mode(adev); - umc_poison = - adev->umc.ras->query_ras_poison_mode(adev); - - /* Only poison is set in both DF and UMC, we can support it */ - if (df_poison && umc_poison) - con->poison_supported = true; - else if (df_poison != umc_poison) - dev_warn(adev->dev, - "Poison setting is inconsistent in DF/UMC(%d:%d)!\n", - df_poison, umc_poison); - } -} - static int amdgpu_get_ras_schema(struct amdgpu_device *adev) { return amdgpu_ras_is_poison_mode_supported(adev) ? AMDGPU_RAS_ERROR__POISON : 0 | @@ -2917,12 +3132,11 @@ int amdgpu_ras_init(struct amdgpu_device *adev) goto release_con; } - amdgpu_ras_query_poison_mode(adev); - /* Packed socket_id to ras feature mask bits[31:29] */ if (adev->smuio.funcs && adev->smuio.funcs->get_socket_id) - con->features |= ((adev->smuio.funcs->get_socket_id(adev)) << 29); + con->features |= ((adev->smuio.funcs->get_socket_id(adev)) << + AMDGPU_RAS_FEATURES_SOCKETID_SHIFT); /* Get RAS schema for particular SOC */ con->schema = amdgpu_get_ras_schema(adev); @@ -3128,7 +3342,7 @@ void amdgpu_ras_suspend(struct amdgpu_device *adev) amdgpu_ras_disable_all_features(adev, 0); /* Make sure all ras objects are disabled. */ - if (con->features) + if (AMDGPU_RAS_GET_FEATURES(con->features)) amdgpu_ras_disable_all_features(adev, 1); } @@ -3142,15 +3356,29 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) return 0; - amdgpu_ras_set_mca_debug_mode(adev, false); + if (amdgpu_aca_is_enabled(adev)) { + if (amdgpu_in_reset(adev)) + r = amdgpu_aca_reset(adev); + else + r = amdgpu_aca_init(adev); + if (r) + return r; + + amdgpu_ras_set_aca_debug_mode(adev, false); + } else { + amdgpu_ras_set_mca_debug_mode(adev, false); + } list_for_each_entry_safe(node, tmp, &adev->ras_list, node) { - if (!node->ras_obj) { + obj = node->ras_obj; + if (!obj) { dev_warn(adev->dev, "Warning: abnormal ras list node.\n"); continue; } - obj = node->ras_obj; + if (!amdgpu_ras_is_supported(adev, obj->ras_comm.block)) + continue; + if (obj->ras_late_init) { r = obj->ras_late_init(adev, &obj->ras_comm); if (r) { @@ -3175,7 +3403,7 @@ int amdgpu_ras_pre_fini(struct amdgpu_device *adev) /* Need disable ras on all IPs here before ip [hw/sw]fini */ - if (con->features) + if (AMDGPU_RAS_GET_FEATURES(con->features)) amdgpu_ras_disable_all_features(adev, 0); amdgpu_ras_recovery_fini(adev); return 0; @@ -3208,10 +3436,13 @@ int amdgpu_ras_fini(struct amdgpu_device *adev) amdgpu_ras_fs_fini(adev); amdgpu_ras_interrupt_remove_all(adev); - WARN(con->features, "Feature mask is not cleared"); + if (amdgpu_aca_is_enabled(adev)) + amdgpu_aca_fini(adev); - if (con->features) - amdgpu_ras_disable_all_features(adev, 1); + WARN(AMDGPU_RAS_GET_FEATURES(con->features), "Feature mask is not cleared"); + + if (AMDGPU_RAS_GET_FEATURES(con->features)) + amdgpu_ras_disable_all_features(adev, 0); cancel_delayed_work_sync(&con->ras_counte_delay_work); @@ -3221,6 +3452,26 @@ int amdgpu_ras_fini(struct amdgpu_device *adev) return 0; } +bool amdgpu_ras_get_fed_status(struct amdgpu_device *adev) +{ + struct amdgpu_ras *ras; + + ras = amdgpu_ras_get_context(adev); + if (!ras) + return false; + + return atomic_read(&ras->fed); +} + +void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status) +{ + struct amdgpu_ras *ras; + + ras = amdgpu_ras_get_context(adev); + if (ras) + atomic_set(&ras->fed, !!status); +} + void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev) { if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) { @@ -3401,6 +3652,7 @@ int amdgpu_ras_is_supported(struct amdgpu_device *adev, block == AMDGPU_RAS_BLOCK__SDMA || block == AMDGPU_RAS_BLOCK__VCN || block == AMDGPU_RAS_BLOCK__JPEG) && + (amdgpu_ras_mask & (1 << block)) && amdgpu_ras_is_poison_mode_supported(adev) && amdgpu_ras_get_ras_block(adev, block, 0)) ret = 1; @@ -3425,22 +3677,41 @@ int amdgpu_ras_set_mca_debug_mode(struct amdgpu_device *adev, bool enable) if (con) { ret = amdgpu_mca_smu_set_debug_mode(adev, enable); if (!ret) - con->is_mca_debug_mode = enable; + con->is_aca_debug_mode = enable; } return ret; } -bool amdgpu_ras_get_mca_debug_mode(struct amdgpu_device *adev) +int amdgpu_ras_set_aca_debug_mode(struct amdgpu_device *adev, bool enable) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + int ret = 0; + + if (con) { + if (amdgpu_aca_is_enabled(adev)) + ret = amdgpu_aca_smu_set_debug_mode(adev, enable); + else + ret = amdgpu_mca_smu_set_debug_mode(adev, enable); + if (!ret) + con->is_aca_debug_mode = enable; + } + + return ret; +} + +bool amdgpu_ras_get_aca_debug_mode(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + const struct aca_smu_funcs *smu_funcs = adev->aca.smu_funcs; const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; if (!con) return false; - if (mca_funcs && mca_funcs->mca_set_debug_mode) - return con->is_mca_debug_mode; + if ((amdgpu_aca_is_enabled(adev) && smu_funcs && smu_funcs->set_debug_mode) || + (!amdgpu_aca_is_enabled(adev) && mca_funcs && mca_funcs->mca_set_debug_mode)) + return con->is_aca_debug_mode; else return true; } @@ -3450,15 +3721,16 @@ bool amdgpu_ras_get_error_query_mode(struct amdgpu_device *adev, { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; + const struct aca_smu_funcs *smu_funcs = adev->aca.smu_funcs; if (!con) { *error_query_mode = AMDGPU_RAS_INVALID_ERROR_QUERY; return false; } - if (mca_funcs && mca_funcs->mca_set_debug_mode) + if ((smu_funcs && smu_funcs->set_debug_mode) || (mca_funcs && mca_funcs->mca_set_debug_mode)) *error_query_mode = - (con->is_mca_debug_mode) ? AMDGPU_RAS_DIRECT_ERROR_QUERY : AMDGPU_RAS_FIRMWARE_ERROR_QUERY; + (con->is_aca_debug_mode) ? AMDGPU_RAS_DIRECT_ERROR_QUERY : AMDGPU_RAS_FIRMWARE_ERROR_QUERY; else *error_query_mode = AMDGPU_RAS_DIRECT_ERROR_QUERY; @@ -3699,8 +3971,7 @@ static int ras_err_info_cmp(void *priv, const struct list_head *a, const struct } static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_data, - struct amdgpu_smuio_mcm_config_info *mcm_info, - struct ras_err_addr *err_addr) + struct amdgpu_smuio_mcm_config_info *mcm_info) { struct ras_err_node *err_node; @@ -3712,10 +3983,9 @@ static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_d if (!err_node) return NULL; - memcpy(&err_node->err_info.mcm_info, mcm_info, sizeof(*mcm_info)); + INIT_LIST_HEAD(&err_node->err_info.err_addr_list); - if (err_addr) - memcpy(&err_node->err_info.err_addr, err_addr, sizeof(*err_addr)); + memcpy(&err_node->err_info.mcm_info, mcm_info, sizeof(*mcm_info)); err_data->err_list_count++; list_add_tail(&err_node->node, &err_data->err_node_list); @@ -3724,6 +3994,29 @@ static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_d return &err_node->err_info; } +void amdgpu_ras_add_mca_err_addr(struct ras_err_info *err_info, struct ras_err_addr *err_addr) +{ + struct ras_err_addr *mca_err_addr; + + mca_err_addr = kzalloc(sizeof(*mca_err_addr), GFP_KERNEL); + if (!mca_err_addr) + return; + + INIT_LIST_HEAD(&mca_err_addr->node); + + mca_err_addr->err_status = err_addr->err_status; + mca_err_addr->err_ipid = err_addr->err_ipid; + mca_err_addr->err_addr = err_addr->err_addr; + + list_add_tail(&mca_err_addr->node, &err_info->err_addr_list); +} + +void amdgpu_ras_del_mca_err_addr(struct ras_err_info *err_info, struct ras_err_addr *mca_err_addr) +{ + list_del(&mca_err_addr->node); + kfree(mca_err_addr); +} + int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data, struct amdgpu_smuio_mcm_config_info *mcm_info, struct ras_err_addr *err_addr, u64 count) @@ -3736,10 +4029,13 @@ int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data, if (!count) return 0; - err_info = amdgpu_ras_error_get_info(err_data, mcm_info, err_addr); + err_info = amdgpu_ras_error_get_info(err_data, mcm_info); if (!err_info) return -EINVAL; + if (err_addr && err_addr->err_status) + amdgpu_ras_add_mca_err_addr(err_info, err_addr); + err_info->ue_count += count; err_data->ue_count += count; @@ -3758,7 +4054,7 @@ int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data, if (!count) return 0; - err_info = amdgpu_ras_error_get_info(err_data, mcm_info, err_addr); + err_info = amdgpu_ras_error_get_info(err_data, mcm_info); if (!err_info) return -EINVAL; @@ -3767,3 +4063,135 @@ int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data, return 0; } + +int amdgpu_ras_error_statistic_de_count(struct ras_err_data *err_data, + struct amdgpu_smuio_mcm_config_info *mcm_info, + struct ras_err_addr *err_addr, u64 count) +{ + struct ras_err_info *err_info; + + if (!err_data || !mcm_info) + return -EINVAL; + + if (!count) + return 0; + + err_info = amdgpu_ras_error_get_info(err_data, mcm_info); + if (!err_info) + return -EINVAL; + + if (err_addr && err_addr->err_status) + amdgpu_ras_add_mca_err_addr(err_info, err_addr); + + err_info->de_count += count; + err_data->de_count += count; + + return 0; +} + +#define mmMP0_SMN_C2PMSG_92 0x1609C +#define mmMP0_SMN_C2PMSG_126 0x160BE +static void amdgpu_ras_boot_time_error_reporting(struct amdgpu_device *adev, + u32 instance, u32 boot_error) +{ + u32 socket_id, aid_id, hbm_id; + u32 reg_data; + u64 reg_addr; + + socket_id = AMDGPU_RAS_GPU_ERR_SOCKET_ID(boot_error); + aid_id = AMDGPU_RAS_GPU_ERR_AID_ID(boot_error); + hbm_id = AMDGPU_RAS_GPU_ERR_HBM_ID(boot_error); + + /* The pattern for smn addressing in other SOC could be different from + * the one for aqua_vanjaram. We should revisit the code if the pattern + * is changed. In such case, replace the aqua_vanjaram implementation + * with more common helper */ + reg_addr = (mmMP0_SMN_C2PMSG_92 << 2) + + aqua_vanjaram_encode_ext_smn_addressing(instance); + + reg_data = amdgpu_device_indirect_rreg_ext(adev, reg_addr); + dev_err(adev->dev, "socket: %d, aid: %d, firmware boot failed, fw status is 0x%x\n", + socket_id, aid_id, reg_data); + + if (AMDGPU_RAS_GPU_ERR_MEM_TRAINING(boot_error)) + dev_info(adev->dev, "socket: %d, aid: %d, hbm: %d, memory training failed\n", + socket_id, aid_id, hbm_id); + + if (AMDGPU_RAS_GPU_ERR_FW_LOAD(boot_error)) + dev_info(adev->dev, "socket: %d, aid: %d, firmware load failed at boot time\n", + socket_id, aid_id); + + if (AMDGPU_RAS_GPU_ERR_WAFL_LINK_TRAINING(boot_error)) + dev_info(adev->dev, "socket: %d, aid: %d, wafl link training failed\n", + socket_id, aid_id); + + if (AMDGPU_RAS_GPU_ERR_XGMI_LINK_TRAINING(boot_error)) + dev_info(adev->dev, "socket: %d, aid: %d, xgmi link training failed\n", + socket_id, aid_id); + + if (AMDGPU_RAS_GPU_ERR_USR_CP_LINK_TRAINING(boot_error)) + dev_info(adev->dev, "socket: %d, aid: %d, usr cp link training failed\n", + socket_id, aid_id); + + if (AMDGPU_RAS_GPU_ERR_USR_DP_LINK_TRAINING(boot_error)) + dev_info(adev->dev, "socket: %d, aid: %d, usr dp link training failed\n", + socket_id, aid_id); + + if (AMDGPU_RAS_GPU_ERR_HBM_MEM_TEST(boot_error)) + dev_info(adev->dev, "socket: %d, aid: %d, hbm: %d, hbm memory test failed\n", + socket_id, aid_id, hbm_id); + + if (AMDGPU_RAS_GPU_ERR_HBM_BIST_TEST(boot_error)) + dev_info(adev->dev, "socket: %d, aid: %d, hbm: %d, hbm bist test failed\n", + socket_id, aid_id, hbm_id); +} + +static int amdgpu_ras_wait_for_boot_complete(struct amdgpu_device *adev, + u32 instance, u32 *boot_error) +{ + u32 reg_addr; + u32 reg_data; + int retry_loop; + + reg_addr = (mmMP0_SMN_C2PMSG_92 << 2) + + aqua_vanjaram_encode_ext_smn_addressing(instance); + + for (retry_loop = 0; retry_loop < AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT; retry_loop++) { + reg_data = amdgpu_device_indirect_rreg_ext(adev, reg_addr); + if ((reg_data & AMDGPU_RAS_BOOT_STATUS_MASK) == AMDGPU_RAS_BOOT_STEADY_STATUS) { + *boot_error = AMDGPU_RAS_BOOT_SUCEESS; + return 0; + } + msleep(1); + } + + /* The pattern for smn addressing in other SOC could be different from + * the one for aqua_vanjaram. We should revisit the code if the pattern + * is changed. In such case, replace the aqua_vanjaram implementation + * with more common helper */ + reg_addr = (mmMP0_SMN_C2PMSG_126 << 2) + + aqua_vanjaram_encode_ext_smn_addressing(instance); + + for (retry_loop = 0; retry_loop < AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT; retry_loop++) { + reg_data = amdgpu_device_indirect_rreg_ext(adev, reg_addr); + if (AMDGPU_RAS_GPU_ERR_BOOT_STATUS(reg_data)) { + *boot_error = reg_data; + return 0; + } + msleep(1); + } + + *boot_error = reg_data; + return -ETIME; +} + +void amdgpu_ras_query_boot_status(struct amdgpu_device *adev, u32 num_instances) +{ + u32 boot_error = 0; + u32 i; + + for (i = 0; i < num_instances; i++) { + if (amdgpu_ras_wait_for_boot_complete(adev, i, &boot_error)) + amdgpu_ras_boot_time_error_reporting(adev, i, boot_error); + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 76fb856287..e0f8ce9d84 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -29,9 +29,28 @@ #include "ta_ras_if.h" #include "amdgpu_ras_eeprom.h" #include "amdgpu_smuio.h" +#include "amdgpu_aca.h" struct amdgpu_iv_entry; +#define AMDGPU_RAS_GPU_ERR_MEM_TRAINING(x) AMDGPU_GET_REG_FIELD(x, 0, 0) +#define AMDGPU_RAS_GPU_ERR_FW_LOAD(x) AMDGPU_GET_REG_FIELD(x, 1, 1) +#define AMDGPU_RAS_GPU_ERR_WAFL_LINK_TRAINING(x) AMDGPU_GET_REG_FIELD(x, 2, 2) +#define AMDGPU_RAS_GPU_ERR_XGMI_LINK_TRAINING(x) AMDGPU_GET_REG_FIELD(x, 3, 3) +#define AMDGPU_RAS_GPU_ERR_USR_CP_LINK_TRAINING(x) AMDGPU_GET_REG_FIELD(x, 4, 4) +#define AMDGPU_RAS_GPU_ERR_USR_DP_LINK_TRAINING(x) AMDGPU_GET_REG_FIELD(x, 5, 5) +#define AMDGPU_RAS_GPU_ERR_HBM_MEM_TEST(x) AMDGPU_GET_REG_FIELD(x, 6, 6) +#define AMDGPU_RAS_GPU_ERR_HBM_BIST_TEST(x) AMDGPU_GET_REG_FIELD(x, 7, 7) +#define AMDGPU_RAS_GPU_ERR_SOCKET_ID(x) AMDGPU_GET_REG_FIELD(x, 10, 8) +#define AMDGPU_RAS_GPU_ERR_AID_ID(x) AMDGPU_GET_REG_FIELD(x, 12, 11) +#define AMDGPU_RAS_GPU_ERR_HBM_ID(x) AMDGPU_GET_REG_FIELD(x, 13, 13) +#define AMDGPU_RAS_GPU_ERR_BOOT_STATUS(x) AMDGPU_GET_REG_FIELD(x, 31, 31) + +#define AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT 1000 +#define AMDGPU_RAS_BOOT_STEADY_STATUS 0xBA +#define AMDGPU_RAS_BOOT_STATUS_MASK 0xFF +#define AMDGPU_RAS_BOOT_SUCEESS 0x80000000 + #define AMDGPU_RAS_FLAG_INIT_BY_VBIOS (0x1 << 0) /* position of instance value in sub_block_index of * ta_ras_trigger_error_input, the sub block uses lower 12 bits @@ -39,6 +58,12 @@ struct amdgpu_iv_entry; #define AMDGPU_RAS_INST_MASK 0xfffff000 #define AMDGPU_RAS_INST_SHIFT 0xc +#define AMDGPU_RAS_FEATURES_SOCKETID_SHIFT 29 +#define AMDGPU_RAS_FEATURES_SOCKETID_MASK 0xe0000000 + +/* The high three bits indicates socketid */ +#define AMDGPU_RAS_GET_FEATURES(val) ((val) & ~AMDGPU_RAS_FEATURES_SOCKETID_MASK) + enum amdgpu_ras_block { AMDGPU_RAS_BLOCK__UMC = 0, AMDGPU_RAS_BLOCK__SDMA, @@ -57,6 +82,8 @@ enum amdgpu_ras_block { AMDGPU_RAS_BLOCK__MCA, AMDGPU_RAS_BLOCK__VCN, AMDGPU_RAS_BLOCK__JPEG, + AMDGPU_RAS_BLOCK__IH, + AMDGPU_RAS_BLOCK__MPIO, AMDGPU_RAS_BLOCK__LAST }; @@ -441,10 +468,17 @@ struct amdgpu_ras { /* Indicates smu whether need update bad channel info */ bool update_channel_flag; /* Record status of smu mca debug mode */ - bool is_mca_debug_mode; + bool is_aca_debug_mode; /* Record special requirements of gpu reset caller */ uint32_t gpu_reset_flags; + + struct task_struct *page_retirement_thread; + wait_queue_head_t page_retirement_wq; + struct mutex page_retirement_lock; + atomic_t page_retirement_req_cnt; + /* Fatal error detected flag */ + atomic_t fed; }; struct ras_fs_data { @@ -453,6 +487,7 @@ struct ras_fs_data { }; struct ras_err_addr { + struct list_head node; uint64_t err_status; uint64_t err_ipid; uint64_t err_addr; @@ -462,7 +497,8 @@ struct ras_err_info { struct amdgpu_smuio_mcm_config_info mcm_info; u64 ce_count; u64 ue_count; - struct ras_err_addr err_addr; + u64 de_count; + struct list_head err_addr_list; }; struct ras_err_node { @@ -473,6 +509,7 @@ struct ras_err_node { struct ras_err_data { unsigned long ue_count; unsigned long ce_count; + unsigned long de_count; unsigned long err_addr_cnt; struct eeprom_table_record *err_addr; u32 err_list_count; @@ -529,6 +566,8 @@ struct ras_manager { struct ras_ih_data ih_data; struct ras_err_data err_data; + + struct aca_handle aca_handle; }; struct ras_badpage { @@ -548,6 +587,7 @@ struct ras_query_if { struct ras_common_if head; unsigned long ue_count; unsigned long ce_count; + unsigned long de_count; }; struct ras_inject_if { @@ -781,7 +821,8 @@ struct amdgpu_ras* amdgpu_ras_get_context(struct amdgpu_device *adev); int amdgpu_ras_set_context(struct amdgpu_device *adev, struct amdgpu_ras *ras_con); int amdgpu_ras_set_mca_debug_mode(struct amdgpu_device *adev, bool enable); -bool amdgpu_ras_get_mca_debug_mode(struct amdgpu_device *adev); +int amdgpu_ras_set_aca_debug_mode(struct amdgpu_device *adev, bool enable); +bool amdgpu_ras_get_aca_debug_mode(struct amdgpu_device *adev); bool amdgpu_ras_get_error_query_mode(struct amdgpu_device *adev, unsigned int *mode); @@ -818,5 +859,24 @@ int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data, int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data, struct amdgpu_smuio_mcm_config_info *mcm_info, struct ras_err_addr *err_addr, u64 count); +int amdgpu_ras_error_statistic_de_count(struct ras_err_data *err_data, + struct amdgpu_smuio_mcm_config_info *mcm_info, + struct ras_err_addr *err_addr, u64 count); +void amdgpu_ras_query_boot_status(struct amdgpu_device *adev, u32 num_instances); +int amdgpu_ras_bind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk, + const struct aca_info *aca_info, void *data); +int amdgpu_ras_unbind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk); + +ssize_t amdgpu_ras_aca_sysfs_read(struct device *dev, struct device_attribute *attr, + struct aca_handle *handle, char *buf, void *data); + +void amdgpu_ras_add_mca_err_addr(struct ras_err_info *err_info, + struct ras_err_addr *err_addr); + +void amdgpu_ras_del_mca_err_addr(struct ras_err_info *err_info, + struct ras_err_addr *mca_err_addr); + +void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status); +bool amdgpu_ras_get_fed_status(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 2fde93b00c..b12808c0c3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -735,6 +735,9 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control) control->tbl_rai.rma_status = GPU_RETIRED__ECC_REACH_THRESHOLD; control->tbl_rai.health_percent = 0; } + + /* ignore the -ENOTSUPP return value */ + amdgpu_dpm_send_rma_reason(adev); } if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c index 4baa300121..147100c27c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c @@ -196,6 +196,13 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, coredump->reset_task_info.process_name, coredump->reset_task_info.pid); + if (coredump->ring) { + drm_printf(&p, "\nRing timed out details\n"); + drm_printf(&p, "IP Type: %d Ring Name: %s\n", + coredump->ring->funcs->type, + coredump->ring->name); + } + if (coredump->reset_vram_lost) drm_printf(&p, "VRAM is lost due to GPU reset!\n"); if (coredump->adev->reset_info.num_regs) { @@ -220,6 +227,8 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, { struct amdgpu_coredump_info *coredump; struct drm_device *dev = adev_to_drm(adev); + struct amdgpu_job *job = reset_context->job; + struct drm_sched_job *s_job; coredump = kzalloc(sizeof(*coredump), GFP_NOWAIT); @@ -230,8 +239,21 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, coredump->reset_vram_lost = vram_lost; - if (reset_context->job && reset_context->job->vm) - coredump->reset_task_info = reset_context->job->vm->task_info; + if (reset_context->job && reset_context->job->vm) { + struct amdgpu_task_info *ti; + struct amdgpu_vm *vm = reset_context->job->vm; + + ti = amdgpu_vm_get_task_info_vm(vm); + if (ti) { + coredump->reset_task_info = *ti; + amdgpu_vm_put_task_info(ti); + } + } + + if (job) { + s_job = &job->base; + coredump->ring = to_amdgpu_ring(s_job->sched); + } coredump->adev = adev; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h index 19899f6b9b..60522963aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h @@ -97,6 +97,7 @@ struct amdgpu_coredump_info { struct amdgpu_task_info reset_task_info; struct timespec64 reset_time; bool reset_vram_lost; + struct amdgpu_ring *ring; }; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index fe1a61eb6e..582053f1cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -209,8 +209,7 @@ struct amdgpu_ring_funcs { void (*insert_end)(struct amdgpu_ring *ring); /* pad the indirect buffer to the necessary number of dw */ void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib); - unsigned (*init_cond_exec)(struct amdgpu_ring *ring); - void (*patch_cond_exec)(struct amdgpu_ring *ring, unsigned offset); + unsigned (*init_cond_exec)(struct amdgpu_ring *ring, uint64_t addr); /* note usage for clock and power gating */ void (*begin_use)(struct amdgpu_ring *ring); void (*end_use)(struct amdgpu_ring *ring); @@ -286,6 +285,9 @@ struct amdgpu_ring { unsigned cond_exe_offs; u64 cond_exe_gpu_addr; volatile u32 *cond_exe_cpu_addr; + unsigned int set_q_mode_offs; + volatile u32 *set_q_mode_ptr; + u64 set_q_mode_token; unsigned vm_hub; unsigned vm_inv_eng; struct dma_fence *vmid_wait; @@ -327,8 +329,7 @@ struct amdgpu_ring { #define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m)) #define amdgpu_ring_emit_frame_cntl(r, b, s) (r)->funcs->emit_frame_cntl((r), (b), (s)) #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) -#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) -#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o)) +#define amdgpu_ring_init_cond_exec(r, a) (r)->funcs->init_cond_exec((r), (a)) #define amdgpu_ring_preempt_ib(r) (r)->funcs->preempt_ib(r) #define amdgpu_ring_patch_cntl(r, o) ((r)->funcs->patch_cntl((r), (o))) #define amdgpu_ring_patch_ce(r, o) ((r)->funcs->patch_ce((r), (o))) @@ -411,6 +412,30 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, ring->count_dw -= count_dw; } +/** + * amdgpu_ring_patch_cond_exec - patch dw count of conditional execute + * @ring: amdgpu_ring structure + * @offset: offset returned by amdgpu_ring_init_cond_exec + * + * Calculate the dw count and patch it into a cond_exec command. + */ +static inline void amdgpu_ring_patch_cond_exec(struct amdgpu_ring *ring, + unsigned int offset) +{ + unsigned cur; + + if (!ring->funcs->init_cond_exec) + return; + + WARN_ON(offset > ring->buf_mask); + WARN_ON(ring->ring[offset] != 0); + + cur = (ring->wptr - 1) & ring->buf_mask; + if (cur < offset) + cur += ring->ring_size >> 2; + ring->ring[offset] = cur - offset; +} + #define amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset) \ (ring->is_mes_queue && ring->mes_ctx ? \ (ring->mes_ctx->meta_data_gpu_addr + offset) : 0) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c index e1ee1c7117..d234b7ccfa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c @@ -159,9 +159,7 @@ int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, mux->ring_entry_size = entry_size; mux->s_resubmit = false; - amdgpu_mux_chunk_slab = kmem_cache_create("amdgpu_mux_chunk", - sizeof(struct amdgpu_mux_chunk), 0, - SLAB_HWCACHE_ALIGN, NULL); + amdgpu_mux_chunk_slab = KMEM_CACHE(amdgpu_mux_chunk, SLAB_HWCACHE_ALIGN); if (!amdgpu_mux_chunk_slab) { DRM_ERROR("create amdgpu_mux_chunk cache failed\n"); return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c index 2c3675d916..db5791e1a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c @@ -241,7 +241,7 @@ void amdgpu_gfx_rlc_setup_cp_table(struct amdgpu_device *adev) table_size = le32_to_cpu(hdr->jt_size); } - for (i = 0; i < table_size; i ++) { + for (i = 0; i < table_size; i++) { dst_ptr[bo_offset + i] = cpu_to_le32(le32_to_cpu(fw_data[table_offset + i])); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h index b591d33af2..5a17e0ff2a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h @@ -169,7 +169,7 @@ struct amdgpu_rlc_funcs { void (*stop)(struct amdgpu_device *adev); void (*reset)(struct amdgpu_device *adev); void (*start)(struct amdgpu_device *adev); - void (*update_spm_vmid)(struct amdgpu_device *adev, unsigned vmid); + void (*update_spm_vmid)(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid); bool (*is_rlcg_access_range)(struct amdgpu_device *adev, uint32_t reg); }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c index 7a6a672754..e22cb2b5cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c @@ -36,13 +36,24 @@ */ /** + * amdgpu_seq64_get_va_base - Get the seq64 va base address + * + * @adev: amdgpu_device pointer + * + * Returns: + * va base address on success + */ +static inline u64 amdgpu_seq64_get_va_base(struct amdgpu_device *adev) +{ + return AMDGPU_VA_RESERVED_SEQ64_START(adev); +} + +/** * amdgpu_seq64_map - Map the seq64 memory to VM * * @adev: amdgpu_device pointer * @vm: vm pointer * @bo_va: bo_va pointer - * @seq64_addr: seq64 vaddr start address - * @size: seq64 pool size * * Map the seq64 memory to the given VM. * @@ -50,11 +61,11 @@ * 0 on success or a negative error code on failure */ int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm, - struct amdgpu_bo_va **bo_va, u64 seq64_addr, - uint32_t size) + struct amdgpu_bo_va **bo_va) { struct amdgpu_bo *bo; struct drm_exec exec; + u64 seq64_addr; int r; bo = adev->seq64.sbo; @@ -77,9 +88,9 @@ int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm, goto error; } - r = amdgpu_vm_bo_map(adev, *bo_va, seq64_addr, 0, size, - AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | - AMDGPU_PTE_EXECUTABLE); + seq64_addr = amdgpu_seq64_get_va_base(adev); + r = amdgpu_vm_bo_map(adev, *bo_va, seq64_addr, 0, AMDGPU_VA_RESERVED_SEQ64_SIZE, + AMDGPU_PTE_READABLE); if (r) { DRM_ERROR("failed to do bo_map on userq sem, err=%d\n", r); amdgpu_vm_bo_del(adev, *bo_va); @@ -144,31 +155,25 @@ error: * amdgpu_seq64_alloc - Allocate a 64 bit memory * * @adev: amdgpu_device pointer - * @gpu_addr: allocated gpu VA start address - * @cpu_addr: allocated cpu VA start address + * @va: VA to access the seq in process address space + * @cpu_addr: CPU address to access the seq * * Alloc a 64 bit memory from seq64 pool. * * Returns: * 0 on success or a negative error code on failure */ -int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *gpu_addr, - u64 **cpu_addr) +int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *va, u64 **cpu_addr) { unsigned long bit_pos; - u32 offset; bit_pos = find_first_zero_bit(adev->seq64.used, adev->seq64.num_sem); + if (bit_pos >= adev->seq64.num_sem) + return -ENOSPC; - if (bit_pos < adev->seq64.num_sem) { - __set_bit(bit_pos, adev->seq64.used); - offset = bit_pos << 6; /* convert to qw offset */ - } else { - return -EINVAL; - } - - *gpu_addr = offset + AMDGPU_SEQ64_VADDR_START; - *cpu_addr = offset + adev->seq64.cpu_base_addr; + __set_bit(bit_pos, adev->seq64.used); + *va = bit_pos * sizeof(u64) + amdgpu_seq64_get_va_base(adev); + *cpu_addr = bit_pos + adev->seq64.cpu_base_addr; return 0; } @@ -177,20 +182,17 @@ int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *gpu_addr, * amdgpu_seq64_free - Free the given 64 bit memory * * @adev: amdgpu_device pointer - * @gpu_addr: gpu start address to be freed + * @va: gpu start address to be freed * * Free the given 64 bit memory from seq64 pool. - * */ -void amdgpu_seq64_free(struct amdgpu_device *adev, u64 gpu_addr) +void amdgpu_seq64_free(struct amdgpu_device *adev, u64 va) { - u32 offset; - - offset = gpu_addr - AMDGPU_SEQ64_VADDR_START; + unsigned long bit_pos; - offset >>= 6; - if (offset < adev->seq64.num_sem) - __clear_bit(offset, adev->seq64.used); + bit_pos = (va - amdgpu_seq64_get_va_base(adev)) / sizeof(u64); + if (bit_pos < adev->seq64.num_sem) + __clear_bit(bit_pos, adev->seq64.used); } /** @@ -229,7 +231,7 @@ int amdgpu_seq64_init(struct amdgpu_device *adev) * AMDGPU_MAX_SEQ64_SLOTS * sizeof(u64) * 8 = AMDGPU_MAX_SEQ64_SLOTS * 64bit slots */ - r = amdgpu_bo_create_kernel(adev, AMDGPU_SEQ64_SIZE, + r = amdgpu_bo_create_kernel(adev, AMDGPU_VA_RESERVED_SEQ64_SIZE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, &adev->seq64.sbo, NULL, (void **)&adev->seq64.cpu_base_addr); @@ -238,7 +240,7 @@ int amdgpu_seq64_init(struct amdgpu_device *adev) return r; } - memset(adev->seq64.cpu_base_addr, 0, AMDGPU_SEQ64_SIZE); + memset(adev->seq64.cpu_base_addr, 0, AMDGPU_VA_RESERVED_SEQ64_SIZE); adev->seq64.num_sem = AMDGPU_MAX_SEQ64_SLOTS; memset(&adev->seq64.used, 0, sizeof(adev->seq64.used)); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h index 2196e72be5..4203b2ab31 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h @@ -25,10 +25,9 @@ #ifndef __AMDGPU_SEQ64_H__ #define __AMDGPU_SEQ64_H__ -#define AMDGPU_SEQ64_SIZE (2ULL << 20) -#define AMDGPU_MAX_SEQ64_SLOTS (AMDGPU_SEQ64_SIZE / (sizeof(u64) * 8)) -#define AMDGPU_SEQ64_VADDR_OFFSET 0x50000 -#define AMDGPU_SEQ64_VADDR_START (AMDGPU_VA_RESERVED_SIZE + AMDGPU_SEQ64_VADDR_OFFSET) +#include "amdgpu_vm.h" + +#define AMDGPU_MAX_SEQ64_SLOTS (AMDGPU_VA_RESERVED_SEQ64_SIZE / sizeof(u64)) struct amdgpu_seq64 { struct amdgpu_bo *sbo; @@ -42,7 +41,7 @@ int amdgpu_seq64_init(struct amdgpu_device *adev); int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *gpu_addr, u64 **cpu_addr); void amdgpu_seq64_free(struct amdgpu_device *adev, u64 gpu_addr); int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm, - struct amdgpu_bo_va **bo_va, u64 seq64_addr, uint32_t size); + struct amdgpu_bo_va **bo_va); void amdgpu_seq64_unmap(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index 1b013a44ca..bdf1ef825d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -441,9 +441,7 @@ void amdgpu_sync_free(struct amdgpu_sync *sync) */ int amdgpu_sync_init(void) { - amdgpu_sync_slab = kmem_cache_create( - "amdgpu_sync", sizeof(struct amdgpu_sync_entry), 0, - SLAB_HWCACHE_ALIGN, NULL); + amdgpu_sync_slab = KMEM_CACHE(amdgpu_sync_entry, SLAB_HWCACHE_ALIGN); if (!amdgpu_sync_slab) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 43ab165df4..29c197c000 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -102,23 +102,19 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, /* Don't handle scatter gather BOs */ if (bo->type == ttm_bo_type_sg) { placement->num_placement = 0; - placement->num_busy_placement = 0; return; } /* Object isn't an AMDGPU object so ignore */ if (!amdgpu_bo_is_amdgpu_bo(bo)) { placement->placement = &placements; - placement->busy_placement = &placements; placement->num_placement = 1; - placement->num_busy_placement = 1; return; } abo = ttm_to_amdgpu_bo(bo); if (abo->flags & AMDGPU_GEM_CREATE_DISCARDABLE) { placement->num_placement = 0; - placement->num_busy_placement = 0; return; } @@ -128,13 +124,13 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, case AMDGPU_PL_OA: case AMDGPU_PL_DOORBELL: placement->num_placement = 0; - placement->num_busy_placement = 0; return; case TTM_PL_VRAM: if (!adev->mman.buffer_funcs_enabled) { /* Move to system memory */ amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU); + } else if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) && amdgpu_res_cpu_visible(adev, bo->resource)) { @@ -149,8 +145,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, AMDGPU_GEM_DOMAIN_CPU); abo->placements[0].fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT; abo->placements[0].lpfn = 0; - abo->placement.busy_placement = &abo->placements[1]; - abo->placement.num_busy_placement = 1; + abo->placements[0].flags |= TTM_PL_FLAG_DESIRED; } else { /* Move to GTT memory */ amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT | @@ -982,8 +977,6 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) /* allocate GART space */ placement.num_placement = 1; placement.placement = &placements; - placement.num_busy_placement = 1; - placement.busy_placement = &placements; placements.fpfn = 0; placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT; placements.mem_type = TTM_PL_TT; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 3e12763e47..0867fd9e15 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -556,6 +556,8 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) default: if (!load_type) return AMDGPU_FW_LOAD_DIRECT; + else if (load_type == 3) + return AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO; else return AMDGPU_FW_LOAD_PSP; } @@ -678,6 +680,8 @@ const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id) return "UMSCH_MM_DATA"; case AMDGPU_UCODE_ID_UMSCH_MM_CMD_BUFFER: return "UMSCH_MM_CMD_BUFFER"; + case AMDGPU_UCODE_ID_JPEG_RAM: + return "JPEG"; default: return "UNKNOWN UCODE"; } @@ -1060,7 +1064,8 @@ static int amdgpu_ucode_patch_jt(struct amdgpu_firmware_info *ucode, int amdgpu_ucode_create_bo(struct amdgpu_device *adev) { - if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) { + if ((adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) && + (adev->firmware.load_type != AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)) { amdgpu_bo_create_kernel(adev, adev->firmware.fw_size, PAGE_SIZE, (amdgpu_sriov_vf(adev) || adev->debug_use_vram_fw_buf) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 4244a13f9f..6194457600 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -511,6 +511,7 @@ enum AMDGPU_UCODE_ID { AMDGPU_UCODE_ID_UMSCH_MM_DATA, AMDGPU_UCODE_ID_UMSCH_MM_CMD_BUFFER, AMDGPU_UCODE_ID_P2S_TABLE, + AMDGPU_UCODE_ID_JPEG_RAM, AMDGPU_UCODE_ID_MAXIMUM, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index d65e21914d..20436f8185 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -23,6 +23,7 @@ #include "amdgpu.h" #include "umc_v6_7.h" +#define MAX_UMC_POISON_POLLING_TIME_SYNC 20 //ms static int amdgpu_umc_convert_error_address(struct amdgpu_device *adev, struct ras_err_data *err_data, uint64_t err_addr, @@ -85,18 +86,21 @@ out_fini_err_data: return ret; } -static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, - void *ras_error_status, - struct amdgpu_iv_entry *entry, - bool reset) +static void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev, + void *ras_error_status) { struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + unsigned int error_query_mode; int ret = 0; + unsigned long err_count; - kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); + amdgpu_ras_get_error_query_mode(adev, &error_query_mode); + + mutex_lock(&con->page_retirement_lock); ret = amdgpu_dpm_get_ecc_info(adev, (void *)&(con->umc_ecc)); - if (ret == -EOPNOTSUPP) { + if (ret == -EOPNOTSUPP && + error_query_mode == AMDGPU_RAS_DIRECT_ERROR_QUERY) { if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops && adev->umc.ras->ras_block.hw_ops->query_ras_error_count) adev->umc.ras->ras_block.hw_ops->query_ras_error_count(adev, ras_error_status); @@ -120,7 +124,8 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, */ adev->umc.ras->ras_block.hw_ops->query_ras_error_address(adev, ras_error_status); } - } else if (!ret) { + } else if (error_query_mode == AMDGPU_RAS_FIRMWARE_ERROR_QUERY || + (!ret && error_query_mode == AMDGPU_RAS_DIRECT_ERROR_QUERY)) { if (adev->umc.ras && adev->umc.ras->ecc_info_query_ras_error_count) adev->umc.ras->ecc_info_query_ras_error_count(adev, ras_error_status); @@ -147,16 +152,13 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, } /* only uncorrectable error needs gpu reset */ - if (err_data->ue_count) { - dev_info(adev->dev, "%ld uncorrectable hardware errors " - "detected in UMC block\n", - err_data->ue_count); - + if (err_data->ue_count || err_data->de_count) { + err_count = err_data->ue_count + err_data->de_count; if ((amdgpu_bad_page_threshold != 0) && err_data->err_addr_cnt) { amdgpu_ras_add_bad_pages(adev, err_data->err_addr, err_data->err_addr_cnt); - amdgpu_ras_save_bad_pages(adev, &(err_data->ue_count)); + amdgpu_ras_save_bad_pages(adev, &err_count); amdgpu_dpm_send_hbm_bad_pages_num(adev, con->eeprom_control.ras_num_recs); @@ -165,20 +167,87 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, con->update_channel_flag = false; } } - - if (reset) { - /* use mode-2 reset for poison consumption */ - if (!entry) - con->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE2_RESET; - amdgpu_ras_reset_gpu(adev); - } } kfree(err_data->err_addr); + + mutex_unlock(&con->page_retirement_lock); +} + +static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, + void *ras_error_status, + struct amdgpu_iv_entry *entry, + bool reset) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + + kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); + amdgpu_umc_handle_bad_pages(adev, ras_error_status); + + if (err_data->ue_count && reset) { + /* use mode-2 reset for poison consumption */ + if (!entry) + con->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE2_RESET; + amdgpu_ras_reset_gpu(adev); + } + return AMDGPU_RAS_SUCCESS; } -int amdgpu_umc_poison_handler(struct amdgpu_device *adev, bool reset) +int amdgpu_umc_bad_page_polling_timeout(struct amdgpu_device *adev, + bool reset, uint32_t timeout_ms) +{ + struct ras_err_data err_data; + struct ras_common_if head = { + .block = AMDGPU_RAS_BLOCK__UMC, + }; + struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head); + uint32_t timeout = timeout_ms; + + memset(&err_data, 0, sizeof(err_data)); + amdgpu_ras_error_data_init(&err_data); + + do { + + amdgpu_umc_handle_bad_pages(adev, &err_data); + + if (timeout && !err_data.de_count) { + msleep(1); + timeout--; + } + + } while (timeout && !err_data.de_count); + + if (!timeout) + dev_warn(adev->dev, "Can't find bad pages\n"); + + if (err_data.de_count) + dev_info(adev->dev, "%ld new deferred hardware errors detected\n", err_data.de_count); + + if (obj) { + obj->err_data.ue_count += err_data.ue_count; + obj->err_data.ce_count += err_data.ce_count; + obj->err_data.de_count += err_data.de_count; + } + + amdgpu_ras_error_data_fini(&err_data); + + kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); + + if (reset) { + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + + /* use mode-2 reset for poison consumption */ + con->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE2_RESET; + amdgpu_ras_reset_gpu(adev); + } + + return 0; +} + +int amdgpu_umc_poison_handler(struct amdgpu_device *adev, + enum amdgpu_ras_block block, bool reset) { int ret = AMDGPU_RAS_SUCCESS; @@ -195,27 +264,41 @@ int amdgpu_umc_poison_handler(struct amdgpu_device *adev, bool reset) } if (!amdgpu_sriov_vf(adev)) { - struct ras_err_data err_data; - struct ras_common_if head = { - .block = AMDGPU_RAS_BLOCK__UMC, - }; - struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head); + if (amdgpu_ip_version(adev, UMC_HWIP, 0) < IP_VERSION(12, 0, 0)) { + struct ras_err_data err_data; + struct ras_common_if head = { + .block = AMDGPU_RAS_BLOCK__UMC, + }; + struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head); + + ret = amdgpu_ras_error_data_init(&err_data); + if (ret) + return ret; - ret = amdgpu_ras_error_data_init(&err_data); - if (ret) - return ret; + ret = amdgpu_umc_do_page_retirement(adev, &err_data, NULL, reset); - ret = amdgpu_umc_do_page_retirement(adev, &err_data, NULL, reset); + if (ret == AMDGPU_RAS_SUCCESS && obj) { + obj->err_data.ue_count += err_data.ue_count; + obj->err_data.ce_count += err_data.ce_count; + obj->err_data.de_count += err_data.de_count; + } - if (ret == AMDGPU_RAS_SUCCESS && obj) { - obj->err_data.ue_count += err_data.ue_count; - obj->err_data.ce_count += err_data.ce_count; - } + amdgpu_ras_error_data_fini(&err_data); + } else { + if (reset) { + amdgpu_umc_bad_page_polling_timeout(adev, + reset, MAX_UMC_POISON_POLLING_TIME_SYNC); + } else { + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - amdgpu_ras_error_data_fini(&err_data); + atomic_inc(&con->page_retirement_req_cnt); + + wake_up(&con->page_retirement_wq); + } + } } else { if (adev->virt.ops && adev->virt.ops->ras_poison_handler) - adev->virt.ops->ras_poison_handler(adev); + adev->virt.ops->ras_poison_handler(adev, block); else dev_warn(adev->dev, "No ras_poison_handler interface in SRIOV!\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index 417a6726c7..26d2ae498d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -21,7 +21,7 @@ #ifndef __AMDGPU_UMC_H__ #define __AMDGPU_UMC_H__ #include "amdgpu_ras.h" - +#include "amdgpu_mca.h" /* * (addr / 256) * 4096, the higher 26 bits in ErrorAddr * is the index of 4KB block @@ -64,6 +64,8 @@ struct amdgpu_umc_ras { void *ras_error_status); void (*ecc_info_query_ras_error_address)(struct amdgpu_device *adev, void *ras_error_status); + bool (*check_ecc_err_status)(struct amdgpu_device *adev, + enum amdgpu_mca_error_type type, void *ras_error_status); /* support different eeprom table version for different asic */ void (*set_eeprom_table_version)(struct amdgpu_ras_eeprom_table_header *hdr); }; @@ -100,7 +102,8 @@ struct amdgpu_umc { int amdgpu_umc_ras_sw_init(struct amdgpu_device *adev); int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); -int amdgpu_umc_poison_handler(struct amdgpu_device *adev, bool reset); +int amdgpu_umc_poison_handler(struct amdgpu_device *adev, + enum amdgpu_ras_block block, bool reset); int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry); @@ -118,4 +121,7 @@ int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev, int amdgpu_umc_loop_channels(struct amdgpu_device *adev, umc_func func, void *data); + +int amdgpu_umc_bad_page_polling_timeout(struct amdgpu_device *adev, + bool reset, uint32_t timeout_ms); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h index 107f9bb0e2..5b27fc41ff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h @@ -69,12 +69,12 @@ struct amdgpu_debugfs_gprwave_data { }; enum AMDGPU_DEBUGFS_REGS2_CMDS { - AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE=0, + AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE = 0, AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE_V2, }; enum AMDGPU_DEBUGFS_GPRWAVE_CMDS { - AMDGPU_DEBUGFS_GPRWAVE_CMD_SET_STATE=0, + AMDGPU_DEBUGFS_GPRWAVE_CMD_SET_STATE = 0, }; //reg2 interface diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c index 1b5ef32108..f7c73533e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c @@ -189,10 +189,13 @@ static void setup_vpe_queue(struct amdgpu_device *adev, mqd->rptr_val = 0; mqd->unmapped = 1; + if (adev->vpe.collaborate_mode) + memcpy(++mqd, test->mqd_data_cpu_addr, sizeof(struct MQD_INFO)); + qinfo->mqd_addr = test->mqd_data_gpu_addr; qinfo->csa_addr = test->ctx_data_gpu_addr + offsetof(struct umsch_mm_test_ctx_data, vpe_ctx_csa); - qinfo->doorbell_offset_0 = (adev->doorbell_index.vpe_ring + 1) << 1; + qinfo->doorbell_offset_0 = 0; qinfo->doorbell_offset_1 = 0; } @@ -287,7 +290,10 @@ static int submit_vpe_queue(struct amdgpu_device *adev, struct umsch_mm_test *te ring[5] = 0; mqd->wptr_val = (6 << 2); - // WDOORBELL32(adev->umsch_mm.agdb_index[CONTEXT_PRIORITY_LEVEL_NORMAL], mqd->wptr_val); + if (adev->vpe.collaborate_mode) + (++mqd)->wptr_val = (6 << 2); + + WDOORBELL32(adev->umsch_mm.agdb_index[CONTEXT_PRIORITY_LEVEL_NORMAL], mqd->wptr_val); for (i = 0; i < adev->usec_timeout; i++) { if (*fence == test_pattern) @@ -358,7 +364,7 @@ static int setup_umsch_mm_test(struct amdgpu_device *adev, memset(test->ring_data_cpu_addr, 0, sizeof(struct umsch_mm_test_ring_data)); - test->ring_data_gpu_addr = AMDGPU_VA_RESERVED_SIZE; + test->ring_data_gpu_addr = AMDGPU_VA_RESERVED_BOTTOM; r = map_ring_data(adev, test->vm, test->ring_data_obj, &test->bo_va, test->ring_data_gpu_addr, sizeof(struct umsch_mm_test_ring_data)); if (r) @@ -571,6 +577,7 @@ int amdgpu_umsch_mm_init_microcode(struct amdgpu_umsch_mm *umsch) switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) { case IP_VERSION(4, 0, 5): + case IP_VERSION(4, 0, 6): fw_name = "amdgpu/umsch_mm_4_0_0.bin"; break; default: @@ -750,6 +757,7 @@ static int umsch_mm_early_init(void *handle) switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) { case IP_VERSION(4, 0, 5): + case IP_VERSION(4, 0, 6): umsch_mm_v4_0_set_funcs(&adev->umsch_mm); break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h index 8258a43a62..5014b5af95 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h @@ -33,13 +33,6 @@ enum UMSCH_SWIP_ENGINE_TYPE { UMSCH_SWIP_ENGINE_TYPE_MAX }; -enum UMSCH_SWIP_AFFINITY_TYPE { - UMSCH_SWIP_AFFINITY_TYPE_ANY = 0, - UMSCH_SWIP_AFFINITY_TYPE_VCN0 = 1, - UMSCH_SWIP_AFFINITY_TYPE_VCN1 = 2, - UMSCH_SWIP_AFFINITY_TYPE_MAX -}; - enum UMSCH_CONTEXT_PRIORITY_LEVEL { CONTEXT_PRIORITY_LEVEL_IDLE = 0, CONTEXT_PRIORITY_LEVEL_NORMAL = 1, @@ -51,13 +44,15 @@ enum UMSCH_CONTEXT_PRIORITY_LEVEL { struct umsch_mm_set_resource_input { uint32_t vmid_mask_mm_vcn; uint32_t vmid_mask_mm_vpe; + uint32_t collaboration_mask_vpe; uint32_t logging_vmid; uint32_t engine_mask; union { struct { uint32_t disable_reset : 1; uint32_t disable_umsch_mm_log : 1; - uint32_t reserved : 30; + uint32_t use_rs64mem_for_proc_ctx_csa : 1; + uint32_t reserved : 29; }; uint32_t uint32_all; }; @@ -78,15 +73,18 @@ struct umsch_mm_add_queue_input { uint32_t doorbell_offset_1; enum UMSCH_SWIP_ENGINE_TYPE engine_type; uint32_t affinity; - enum UMSCH_SWIP_AFFINITY_TYPE affinity_type; uint64_t mqd_addr; uint64_t h_context; uint64_t h_queue; uint32_t vm_context_cntl; + uint32_t process_csa_array_index; + uint32_t context_csa_array_index; + struct { uint32_t is_context_suspended : 1; - uint32_t reserved : 31; + uint32_t collaboration_mode : 1; + uint32_t reserved : 30; }; }; @@ -94,6 +92,7 @@ struct umsch_mm_remove_queue_input { uint32_t doorbell_offset_0; uint32_t doorbell_offset_1; uint64_t context_csa_addr; + uint32_t context_csa_array_index; }; struct MQD_INFO { @@ -103,6 +102,7 @@ struct MQD_INFO { uint32_t wptr_val; uint32_t rptr_val; uint32_t unmapped; + uint32_t vmid; }; struct amdgpu_umsch_mm; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index f4963330c7..9c514a606a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -59,6 +59,9 @@ #define FIRMWARE_VCN4_0_3 "amdgpu/vcn_4_0_3.bin" #define FIRMWARE_VCN4_0_4 "amdgpu/vcn_4_0_4.bin" #define FIRMWARE_VCN4_0_5 "amdgpu/vcn_4_0_5.bin" +#define FIRMWARE_VCN4_0_6 "amdgpu/vcn_4_0_6.bin" +#define FIRMWARE_VCN4_0_6_1 "amdgpu/vcn_4_0_6_1.bin" +#define FIRMWARE_VCN5_0_0 "amdgpu/vcn_5_0_0.bin" MODULE_FIRMWARE(FIRMWARE_RAVEN); MODULE_FIRMWARE(FIRMWARE_PICASSO); @@ -82,6 +85,9 @@ MODULE_FIRMWARE(FIRMWARE_VCN4_0_2); MODULE_FIRMWARE(FIRMWARE_VCN4_0_3); MODULE_FIRMWARE(FIRMWARE_VCN4_0_4); MODULE_FIRMWARE(FIRMWARE_VCN4_0_5); +MODULE_FIRMWARE(FIRMWARE_VCN4_0_6); +MODULE_FIRMWARE(FIRMWARE_VCN4_0_6_1); +MODULE_FIRMWARE(FIRMWARE_VCN5_0_0); static void amdgpu_vcn_idle_work_handler(struct work_struct *work); @@ -89,14 +95,22 @@ int amdgpu_vcn_early_init(struct amdgpu_device *adev) { char ucode_prefix[30]; char fw_name[40]; - int r; + int r, i; - amdgpu_ucode_ip_version_decode(adev, UVD_HWIP, ucode_prefix, sizeof(ucode_prefix)); - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", ucode_prefix); - r = amdgpu_ucode_request(adev, &adev->vcn.fw, fw_name); - if (r) - amdgpu_ucode_release(&adev->vcn.fw); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + amdgpu_ucode_ip_version_decode(adev, UVD_HWIP, ucode_prefix, sizeof(ucode_prefix)); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", ucode_prefix); + if (amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(4, 0, 6) && + i == 1) { + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_%d.bin", ucode_prefix, i); + } + r = amdgpu_ucode_request(adev, &adev->vcn.fw[i], fw_name); + if (r) { + amdgpu_ucode_release(&adev->vcn.fw[i]); + return r; + } + } return r; } @@ -137,7 +151,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) } } - hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + hdr = (const struct common_firmware_header *)adev->vcn.fw[0]->data; adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version); /* Bit 20-23, it is encode major and non-zero for new naming convention. @@ -252,9 +266,10 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) for (i = 0; i < adev->vcn.num_enc_rings; ++i) amdgpu_ring_fini(&adev->vcn.inst[j].ring_enc[i]); + + amdgpu_ucode_release(&adev->vcn.fw[j]); } - amdgpu_ucode_release(&adev->vcn.fw); mutex_destroy(&adev->vcn.vcn1_jpeg1_workaround); mutex_destroy(&adev->vcn.vcn_pg_lock); @@ -350,11 +365,12 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev) const struct common_firmware_header *hdr; unsigned int offset; - hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + hdr = (const struct common_firmware_header *)adev->vcn.fw[i]->data; if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { offset = le32_to_cpu(hdr->ucode_array_offset_bytes); if (drm_dev_enter(adev_to_drm(adev), &idx)) { - memcpy_toio(adev->vcn.inst[i].cpu_addr, adev->vcn.fw->data + offset, + memcpy_toio(adev->vcn.inst[i].cpu_addr, + adev->vcn.fw[i]->data + offset, le32_to_cpu(hdr->ucode_size_bytes)); drm_dev_exit(idx); } @@ -1039,11 +1055,11 @@ void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev) if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { const struct common_firmware_header *hdr; - hdr = (const struct common_firmware_header *)adev->vcn.fw->data; - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { if (adev->vcn.harvest_config & (1 << i)) continue; + + hdr = (const struct common_firmware_header *)adev->vcn.fw[i]->data; /* currently only support 2 FW instances */ if (i >= 2) { dev_info(adev->dev, "More then 2 VCN FW instances!\n"); @@ -1051,7 +1067,7 @@ void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev) } idx = AMDGPU_UCODE_ID_VCN + i; adev->firmware.ucode[idx].ucode_id = idx; - adev->firmware.ucode[idx].fw = adev->vcn.fw; + adev->firmware.ucode[idx].fw = adev->vcn.fw[i]; adev->firmware.fw_size += ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); @@ -1189,7 +1205,7 @@ int amdgpu_vcn_process_poison_irq(struct amdgpu_device *adev, amdgpu_ras_interrupt_dispatch(adev, &ih_data); } else { if (adev->virt.ops && adev->virt.ops->ras_poison_handler) - adev->virt.ops->ras_poison_handler(adev); + adev->virt.ops->ras_poison_handler(adev, ras_if->block); else dev_warn(adev->dev, "No ras_poison_handler interface in SRIOV for VCN!\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 514c98ea14..a418393d89 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -160,6 +160,48 @@ } \ } while (0) +#define SOC24_DPG_MODE_OFFSET(ip, inst_idx, reg) \ + ({ \ + uint32_t internal_reg_offset, addr; \ + bool video_range, aon_range; \ + \ + addr = (adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg); \ + addr <<= 2; \ + video_range = ((((0xFFFFF & addr) >= (VCN_VID_SOC_ADDRESS)) && \ + ((0xFFFFF & addr) < ((VCN_VID_SOC_ADDRESS + 0x2600))))); \ + aon_range = ((((0xFFFFF & addr) >= (VCN_AON_SOC_ADDRESS)) && \ + ((0xFFFFF & addr) < ((VCN_AON_SOC_ADDRESS + 0x600))))); \ + if (video_range) \ + internal_reg_offset = ((0xFFFFF & addr) - (VCN_VID_SOC_ADDRESS) + \ + (VCN_VID_IP_ADDRESS)); \ + else if (aon_range) \ + internal_reg_offset = ((0xFFFFF & addr) - (VCN_AON_SOC_ADDRESS) + \ + (VCN_AON_IP_ADDRESS)); \ + else \ + internal_reg_offset = (0xFFFFF & addr); \ + \ + internal_reg_offset >>= 2; \ + }) + +#define WREG32_SOC24_DPG_MODE(inst_idx, offset, value, mask_en, indirect) \ + do { \ + if (!indirect) { \ + WREG32_SOC15(VCN, GET_INST(VCN, inst_idx), \ + regUVD_DPG_LMA_DATA, value); \ + WREG32_SOC15( \ + VCN, GET_INST(VCN, inst_idx), \ + regUVD_DPG_LMA_CTL, \ + (0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \ + mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \ + offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \ + } else { \ + *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = \ + offset; \ + *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = \ + value; \ + } \ + } while (0) + #define AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE (1 << 2) #define AMDGPU_FW_SHARED_FLAG_0_DRM_KEY_INJECT (1 << 4) #define AMDGPU_VCN_FW_SHARED_FLAG_0_RB (1 << 6) @@ -264,7 +306,7 @@ struct amdgpu_vcn_ras { struct amdgpu_vcn { unsigned fw_version; struct delayed_work idle_work; - const struct firmware *fw; /* VCN firmware */ + const struct firmware *fw[AMDGPU_MAX_VCN_INSTANCES]; /* VCN firmware */ unsigned num_enc_rings; enum amd_powergating_state cur_state; bool indirect_sram; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 0dcff2889e..7a4eae3677 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -71,59 +71,6 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev) amdgpu_num_kcq = 2; } -void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, - uint32_t reg0, uint32_t reg1, - uint32_t ref, uint32_t mask, - uint32_t xcc_inst) -{ - struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_inst]; - struct amdgpu_ring *ring = &kiq->ring; - signed long r, cnt = 0; - unsigned long flags; - uint32_t seq; - - if (adev->mes.ring.sched.ready) { - amdgpu_mes_reg_write_reg_wait(adev, reg0, reg1, - ref, mask); - return; - } - - spin_lock_irqsave(&kiq->ring_lock, flags); - amdgpu_ring_alloc(ring, 32); - amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1, - ref, mask); - r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); - if (r) - goto failed_undo; - - amdgpu_ring_commit(ring); - spin_unlock_irqrestore(&kiq->ring_lock, flags); - - r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); - - /* don't wait anymore for IRQ context */ - if (r < 1 && in_interrupt()) - goto failed_kiq; - - might_sleep(); - while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { - - msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); - r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); - } - - if (cnt > MAX_KIQ_REG_TRY) - goto failed_kiq; - - return; - -failed_undo: - amdgpu_ring_undo(ring); - spin_unlock_irqrestore(&kiq->ring_lock, flags); -failed_kiq: - dev_err(adev->dev, "failed to write reg %x wait reg %x\n", reg0, reg1); -} - /** * amdgpu_virt_request_full_gpu() - request full gpu access * @adev: amdgpu device. @@ -303,11 +250,11 @@ static int amdgpu_virt_init_ras_err_handler_data(struct amdgpu_device *adev) if (!*data) goto data_failure; - bps = kmalloc_array(align_space, sizeof((*data)->bps), GFP_KERNEL); + bps = kmalloc_array(align_space, sizeof(*(*data)->bps), GFP_KERNEL); if (!bps) goto bps_failure; - bps_bo = kmalloc_array(align_space, sizeof((*data)->bps_bo), GFP_KERNEL); + bps_bo = kmalloc_array(align_space, sizeof(*(*data)->bps_bo), GFP_KERNEL); if (!bps_bo) goto bps_bo_failure; @@ -340,8 +287,10 @@ static void amdgpu_virt_ras_release_bp(struct amdgpu_device *adev) for (i = data->last_reserved - 1; i >= 0; i--) { bo = data->bps_bo[i]; - amdgpu_bo_free_kernel(&bo, NULL, NULL); - data->bps_bo[i] = bo; + if (bo) { + amdgpu_bo_free_kernel(&bo, NULL, NULL); + data->bps_bo[i] = bo; + } data->last_reserved = i; } } @@ -381,6 +330,8 @@ static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev) { struct amdgpu_virt *virt = &adev->virt; struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data; + struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr; + struct ttm_resource_manager *man = &mgr->manager; struct amdgpu_bo *bo = NULL; uint64_t bp; int i; @@ -396,12 +347,18 @@ static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev) * 2) a ras bad page has been reserved (duplicate error injection * for one page); */ - if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT, - AMDGPU_GPU_PAGE_SIZE, - &bo, NULL)) - DRM_DEBUG("RAS WARN: reserve vram for retired page %llx fail\n", bp); - - data->bps_bo[i] = bo; + if (ttm_resource_manager_used(man)) { + amdgpu_vram_mgr_reserve_range(&adev->mman.vram_mgr, + bp << AMDGPU_GPU_PAGE_SHIFT, + AMDGPU_GPU_PAGE_SIZE); + data->bps_bo[i] = NULL; + } else { + if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT, + AMDGPU_GPU_PAGE_SIZE, + &bo, NULL)) + DRM_DEBUG("RAS WARN: reserve vram for retired page %llx fail\n", bp); + data->bps_bo[i] = bo; + } data->last_reserved = i + 1; bo = NULL; } @@ -1022,7 +979,7 @@ u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 f * SCRATCH_REG0 = read/write value * SCRATCH_REG1[30:28] = command * SCRATCH_REG1[19:0] = address in dword - * SCRATCH_REG1[26:24] = Error reporting + * SCRATCH_REG1[27:24] = Error reporting */ writel(v, scratch_reg0); writel((offset | flag), scratch_reg1); @@ -1036,7 +993,8 @@ u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 f udelay(10); } - if (i >= timeout) { + tmp = readl(scratch_reg1); + if (i >= timeout || (tmp & AMDGPU_RLCG_SCRATCH1_ERROR_MASK) != 0) { if (amdgpu_sriov_rlcg_error_report_enabled(adev)) { if (tmp & AMDGPU_RLCG_VFGATE_DISABLED) { dev_err(adev->dev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index d4207e4414..3f59b7b552 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -45,6 +45,7 @@ #define AMDGPU_RLCG_REG_NOT_IN_RANGE 0x1000000 #define AMDGPU_RLCG_SCRATCH1_ADDRESS_MASK 0xFFFFF +#define AMDGPU_RLCG_SCRATCH1_ERROR_MASK 0xF000000 /* all asic after AI use this offset */ #define mmRCC_IOV_FUNC_IDENTIFIER 0xDE5 @@ -88,7 +89,8 @@ struct amdgpu_virt_ops { int (*wait_reset)(struct amdgpu_device *adev); void (*trans_msg)(struct amdgpu_device *adev, enum idh_request req, u32 data1, u32 data2, u32 data3); - void (*ras_poison_handler)(struct amdgpu_device *adev); + void (*ras_poison_handler)(struct amdgpu_device *adev, + enum amdgpu_ras_block block); }; /* @@ -332,10 +334,6 @@ static inline bool is_virtual_machine(void) ((adev)->virt.gim_feature & AMDGIM_FEATURE_VCN_RB_DECOUPLE) bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev); void amdgpu_virt_init_setting(struct amdgpu_device *adev); -void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, - uint32_t reg0, uint32_t rreg1, - uint32_t ref, uint32_t mask, - uint32_t xcc_inst); int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init); int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init); int amdgpu_virt_reset_gpu(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c index 453a4b786c..8baa2e0935 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c @@ -660,8 +660,7 @@ static const struct amd_ip_funcs amdgpu_vkms_ip_funcs = { .set_powergating_state = amdgpu_vkms_set_powergating_state, }; -const struct amdgpu_ip_block_version amdgpu_vkms_ip_block = -{ +const struct amdgpu_ip_block_version amdgpu_vkms_ip_block = { .type = AMD_IP_BLOCK_TYPE_DCE, .major = 1, .minor = 0, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 5b50f73591..94089069c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -234,6 +234,22 @@ static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo) } /** + * amdgpu_vm_bo_evicted_user - vm_bo is evicted + * + * @vm_bo: vm_bo which is evicted + * + * State for BOs used by user mode queues which are not at the location they + * should be. + */ +static void amdgpu_vm_bo_evicted_user(struct amdgpu_vm_bo_base *vm_bo) +{ + vm_bo->moved = true; + spin_lock(&vm_bo->vm->status_lock); + list_move(&vm_bo->vm_status, &vm_bo->vm->evicted_user); + spin_unlock(&vm_bo->vm->status_lock); +} + +/** * amdgpu_vm_bo_relocated - vm_bo is reloacted * * @vm_bo: vm_bo which is relocated @@ -427,21 +443,25 @@ uint64_t amdgpu_vm_generation(struct amdgpu_device *adev, struct amdgpu_vm *vm) } /** - * amdgpu_vm_validate_pt_bos - validate the page table BOs + * amdgpu_vm_validate - validate evicted BOs tracked in the VM * * @adev: amdgpu device pointer * @vm: vm providing the BOs + * @ticket: optional reservation ticket used to reserve the VM * @validate: callback to do the validation * @param: parameter for the validation callback * - * Validate the page table BOs on command submission if neccessary. + * Validate the page table BOs and per-VM BOs on command submission if + * necessary. If a ticket is given, also try to validate evicted user queue + * BOs. They must already be reserved with the given ticket. * * Returns: * Validation result. */ -int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, - int (*validate)(void *p, struct amdgpu_bo *bo), - void *param) +int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct ww_acquire_ctx *ticket, + int (*validate)(void *p, struct amdgpu_bo *bo), + void *param) { struct amdgpu_vm_bo_base *bo_base; struct amdgpu_bo *shadow; @@ -484,6 +504,34 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, } spin_lock(&vm->status_lock); } + while (ticket && !list_empty(&vm->evicted_user)) { + bo_base = list_first_entry(&vm->evicted_user, + struct amdgpu_vm_bo_base, + vm_status); + spin_unlock(&vm->status_lock); + + bo = bo_base->bo; + + if (dma_resv_locking_ctx(bo->tbo.base.resv) != ticket) { + struct amdgpu_task_info *ti = amdgpu_vm_get_task_info_vm(vm); + + pr_warn_ratelimited("Evicted user BO is not reserved\n"); + if (ti) { + pr_warn_ratelimited("pid %d\n", ti->pid); + amdgpu_vm_put_task_info(ti); + } + + return -EINVAL; + } + + r = validate(param, bo); + if (r) + return r; + + amdgpu_vm_bo_invalidated(bo_base); + + spin_lock(&vm->status_lock); + } spin_unlock(&vm->status_lock); amdgpu_vm_eviction_lock(vm); @@ -610,7 +658,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool vm_flush_needed = job->vm_needs_flush; struct dma_fence *fence = NULL; bool pasid_mapping_needed = false; - unsigned patch_offset = 0; + unsigned int patch; int r; if (amdgpu_vmid_had_gpu_reset(adev, id)) { @@ -637,7 +685,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, amdgpu_ring_ib_begin(ring); if (ring->funcs->init_cond_exec) - patch_offset = amdgpu_ring_init_cond_exec(ring); + patch = amdgpu_ring_init_cond_exec(ring, + ring->cond_exe_gpu_addr); if (need_pipe_sync) amdgpu_ring_emit_pipeline_sync(ring); @@ -651,7 +700,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid); if (spm_update_needed && adev->gfx.rlc.funcs->update_spm_vmid) - adev->gfx.rlc.funcs->update_spm_vmid(adev, job->vmid); + adev->gfx.rlc.funcs->update_spm_vmid(adev, ring, job->vmid); if (!ring->is_mes_queue && ring->funcs->emit_gds_switch && gds_switch_needed) { @@ -685,8 +734,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, } dma_fence_put(fence); - if (ring->funcs->patch_cond_exec) - amdgpu_ring_patch_cond_exec(ring, patch_offset); + amdgpu_ring_patch_cond_exec(ring, patch); /* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */ if (ring->funcs->emit_switch_buffer) { @@ -1343,10 +1391,6 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct amdgpu_bo_va_mapping, list); list_del(&mapping->list); - if (vm->pte_support_ats && - mapping->start < AMDGPU_GMC_HOLE_START) - init_pte_value = AMDGPU_PTE_DEFAULT_ATC; - r = amdgpu_vm_update_range(adev, vm, false, false, true, false, resv, mapping->start, mapping->last, init_pte_value, 0, 0, NULL, NULL, @@ -1426,11 +1470,21 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, } r = amdgpu_vm_bo_update(adev, bo_va, clear); - if (r) - return r; if (unlock) dma_resv_unlock(resv); + if (r) + return r; + + /* Remember evicted DMABuf imports in compute VMs for later + * validation + */ + if (vm->is_compute_context && + bo_va->base.bo->tbo.base.import_attach && + (!bo_va->base.bo->tbo.resource || + bo_va->base.bo->tbo.resource->mem_type == TTM_PL_SYSTEM)) + amdgpu_vm_bo_evicted_user(&bo_va->base); + spin_lock(&vm->status_lock); } spin_unlock(&vm->status_lock); @@ -2193,6 +2247,108 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout) return dma_fence_wait_timeout(vm->last_unlocked, true, timeout); } +static void amdgpu_vm_destroy_task_info(struct kref *kref) +{ + struct amdgpu_task_info *ti = container_of(kref, struct amdgpu_task_info, refcount); + + kfree(ti); +} + +static inline struct amdgpu_vm * +amdgpu_vm_get_vm_from_pasid(struct amdgpu_device *adev, u32 pasid) +{ + struct amdgpu_vm *vm; + unsigned long flags; + + xa_lock_irqsave(&adev->vm_manager.pasids, flags); + vm = xa_load(&adev->vm_manager.pasids, pasid); + xa_unlock_irqrestore(&adev->vm_manager.pasids, flags); + + return vm; +} + +/** + * amdgpu_vm_put_task_info - reference down the vm task_info ptr + * + * @task_info: task_info struct under discussion. + * + * frees the vm task_info ptr at the last put + */ +void amdgpu_vm_put_task_info(struct amdgpu_task_info *task_info) +{ + kref_put(&task_info->refcount, amdgpu_vm_destroy_task_info); +} + +/** + * amdgpu_vm_get_task_info_vm - Extracts task info for a vm. + * + * @vm: VM to get info from + * + * Returns the reference counted task_info structure, which must be + * referenced down with amdgpu_vm_put_task_info. + */ +struct amdgpu_task_info * +amdgpu_vm_get_task_info_vm(struct amdgpu_vm *vm) +{ + struct amdgpu_task_info *ti = NULL; + + if (vm) { + ti = vm->task_info; + kref_get(&vm->task_info->refcount); + } + + return ti; +} + +/** + * amdgpu_vm_get_task_info_pasid - Extracts task info for a PASID. + * + * @adev: drm device pointer + * @pasid: PASID identifier for VM + * + * Returns the reference counted task_info structure, which must be + * referenced down with amdgpu_vm_put_task_info. + */ +struct amdgpu_task_info * +amdgpu_vm_get_task_info_pasid(struct amdgpu_device *adev, u32 pasid) +{ + return amdgpu_vm_get_task_info_vm( + amdgpu_vm_get_vm_from_pasid(adev, pasid)); +} + +static int amdgpu_vm_create_task_info(struct amdgpu_vm *vm) +{ + vm->task_info = kzalloc(sizeof(struct amdgpu_task_info), GFP_KERNEL); + if (!vm->task_info) + return -ENOMEM; + + kref_init(&vm->task_info->refcount); + return 0; +} + +/** + * amdgpu_vm_set_task_info - Sets VMs task info. + * + * @vm: vm for which to set the info + */ +void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) +{ + if (!vm->task_info) + return; + + if (vm->task_info->pid == current->pid) + return; + + vm->task_info->pid = current->pid; + get_task_comm(vm->task_info->task_name, current); + + if (current->group_leader->mm != current->mm) + return; + + vm->task_info->tgid = current->group_leader->pid; + get_task_comm(vm->task_info->process_name, current->group_leader); +} + /** * amdgpu_vm_init - initialize a vm instance * @@ -2216,6 +2372,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) vm->reserved_vmid[i] = NULL; INIT_LIST_HEAD(&vm->evicted); + INIT_LIST_HEAD(&vm->evicted_user); INIT_LIST_HEAD(&vm->relocated); INIT_LIST_HEAD(&vm->moved); INIT_LIST_HEAD(&vm->idle); @@ -2231,7 +2388,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (r) return r; - vm->pte_support_ats = false; vm->is_compute_context = false; vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & @@ -2278,6 +2434,10 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (r) goto error_free_root; + r = amdgpu_vm_create_task_info(vm); + if (r) + DRM_DEBUG("Failed to create task info for VM\n"); + amdgpu_bo_unreserve(vm->root.bo); amdgpu_bo_unref(&root_bo); @@ -2317,30 +2477,12 @@ error_free_delayed: */ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) { - bool pte_support_ats = (adev->asic_type == CHIP_RAVEN); int r; r = amdgpu_bo_reserve(vm->root.bo, true); if (r) return r; - /* Check if PD needs to be reinitialized and do it before - * changing any other state, in case it fails. - */ - if (pte_support_ats != vm->pte_support_ats) { - /* Sanity checks */ - if (!amdgpu_vm_pt_is_root_clean(adev, vm)) { - r = -EINVAL; - goto unreserve_bo; - } - - vm->pte_support_ats = pte_support_ats; - r = amdgpu_vm_pt_clear(adev, vm, to_amdgpu_bo_vm(vm->root.bo), - false); - if (r) - goto unreserve_bo; - } - /* Update VM state */ vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & AMDGPU_VM_USE_CPU_FOR_COMPUTE); @@ -2417,6 +2559,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) root = amdgpu_bo_ref(vm->root.bo); amdgpu_bo_reserve(root, true); + amdgpu_vm_put_task_info(vm->task_info); amdgpu_vm_set_pasid(adev, vm, 0); dma_fence_wait(vm->last_unlocked, false); dma_fence_put(vm->last_unlocked); @@ -2574,48 +2717,6 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) } /** - * amdgpu_vm_get_task_info - Extracts task info for a PASID. - * - * @adev: drm device pointer - * @pasid: PASID identifier for VM - * @task_info: task_info to fill. - */ -void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid, - struct amdgpu_task_info *task_info) -{ - struct amdgpu_vm *vm; - unsigned long flags; - - xa_lock_irqsave(&adev->vm_manager.pasids, flags); - - vm = xa_load(&adev->vm_manager.pasids, pasid); - if (vm) - *task_info = vm->task_info; - - xa_unlock_irqrestore(&adev->vm_manager.pasids, flags); -} - -/** - * amdgpu_vm_set_task_info - Sets VMs task info. - * - * @vm: vm for which to set the info - */ -void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) -{ - if (vm->task_info.pid) - return; - - vm->task_info.pid = current->pid; - get_task_comm(vm->task_info.task_name, current); - - if (current->group_leader->mm != current->mm) - return; - - vm->task_info.tgid = current->group_leader->pid; - get_task_comm(vm->task_info.process_name, current->group_leader); -} - -/** * amdgpu_vm_handle_fault - graceful handling of VM faults. * @adev: amdgpu device pointer * @pasid: PASID of the VM diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 4740dd65b9..047ec1930d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -135,8 +135,21 @@ struct amdgpu_mem_stats; #define AMDGPU_IS_MMHUB0(x) ((x) >= AMDGPU_MMHUB0_START && (x) < AMDGPU_MMHUB1_START) #define AMDGPU_IS_MMHUB1(x) ((x) >= AMDGPU_MMHUB1_START && (x) < AMDGPU_MAX_VMHUBS) -/* Reserve 2MB at top/bottom of address space for kernel use */ -#define AMDGPU_VA_RESERVED_SIZE (2ULL << 20) +/* Reserve space at top/bottom of address space for kernel use */ +#define AMDGPU_VA_RESERVED_CSA_SIZE (2ULL << 20) +#define AMDGPU_VA_RESERVED_CSA_START(adev) (((adev)->vm_manager.max_pfn \ + << AMDGPU_GPU_PAGE_SHIFT) \ + - AMDGPU_VA_RESERVED_CSA_SIZE) +#define AMDGPU_VA_RESERVED_SEQ64_SIZE (2ULL << 20) +#define AMDGPU_VA_RESERVED_SEQ64_START(adev) (AMDGPU_VA_RESERVED_CSA_START(adev) \ + - AMDGPU_VA_RESERVED_SEQ64_SIZE) +#define AMDGPU_VA_RESERVED_TRAP_SIZE (2ULL << 12) +#define AMDGPU_VA_RESERVED_TRAP_START(adev) (AMDGPU_VA_RESERVED_SEQ64_START(adev) \ + - AMDGPU_VA_RESERVED_TRAP_SIZE) +#define AMDGPU_VA_RESERVED_BOTTOM (1ULL << 16) +#define AMDGPU_VA_RESERVED_TOP (AMDGPU_VA_RESERVED_TRAP_SIZE + \ + AMDGPU_VA_RESERVED_SEQ64_SIZE + \ + AMDGPU_VA_RESERVED_CSA_SIZE) /* See vm_update_mode */ #define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0) @@ -190,10 +203,11 @@ struct amdgpu_vm_pte_funcs { }; struct amdgpu_task_info { - char process_name[TASK_COMM_LEN]; - char task_name[TASK_COMM_LEN]; - pid_t pid; - pid_t tgid; + char process_name[TASK_COMM_LEN]; + char task_name[TASK_COMM_LEN]; + pid_t pid; + pid_t tgid; + struct kref refcount; }; /** @@ -288,9 +302,12 @@ struct amdgpu_vm { /* Lock to protect vm_bo add/del/move on all lists of vm */ spinlock_t status_lock; - /* BOs who needs a validation */ + /* Per-VM and PT BOs who needs a validation */ struct list_head evicted; + /* BOs for user mode queues that need a validation */ + struct list_head evicted_user; + /* PT BOs which relocated and their parent need an update */ struct list_head relocated; @@ -341,9 +358,6 @@ struct amdgpu_vm { /* Functions to use for VM table updates */ const struct amdgpu_vm_update_funcs *update_funcs; - /* Flag to indicate ATS support from PTE for GFX9 */ - bool pte_support_ats; - /* Up to 128 pending retry page faults */ DECLARE_KFIFO(faults, u64, 128); @@ -357,7 +371,7 @@ struct amdgpu_vm { uint64_t pd_phys_addr; /* Some basic info about the task */ - struct amdgpu_task_info task_info; + struct amdgpu_task_info *task_info; /* Store positions of group of BOs */ struct ttm_lru_bulk_move lru_bulk_move; @@ -434,9 +448,10 @@ int amdgpu_vm_lock_pd(struct amdgpu_vm *vm, struct drm_exec *exec, unsigned int num_fences); bool amdgpu_vm_ready(struct amdgpu_vm *vm); uint64_t amdgpu_vm_generation(struct amdgpu_device *adev, struct amdgpu_vm *vm); -int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, - int (*callback)(void *p, struct amdgpu_bo *bo), - void *param); +int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct ww_acquire_ctx *ticket, + int (*callback)(void *p, struct amdgpu_bo *bo), + void *param); int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync); int amdgpu_vm_update_pdes(struct amdgpu_device *adev, struct amdgpu_vm *vm, bool immediate); @@ -497,8 +512,14 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, struct amdgpu_job *job); void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev); -void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid, - struct amdgpu_task_info *task_info); +struct amdgpu_task_info * +amdgpu_vm_get_task_info_pasid(struct amdgpu_device *adev, u32 pasid); + +struct amdgpu_task_info * +amdgpu_vm_get_task_info_vm(struct amdgpu_vm *vm); + +void amdgpu_vm_put_task_info(struct amdgpu_task_info *task_info); + bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, u32 vmid, u32 node_id, uint64_t addr, bool write_fault); @@ -516,8 +537,6 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, int level, bool immediate, struct amdgpu_bo_vm **vmbo, int32_t xcp_id); void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm); -bool amdgpu_vm_pt_is_root_clean(struct amdgpu_device *adev, - struct amdgpu_vm *vm); int amdgpu_vm_pde_update(struct amdgpu_vm_update_params *params, struct amdgpu_vm_bo_base *entry); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index a160265ddc..124389a6bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -90,22 +90,6 @@ static unsigned int amdgpu_vm_pt_num_entries(struct amdgpu_device *adev, } /** - * amdgpu_vm_pt_num_ats_entries - return the number of ATS entries in the root PD - * - * @adev: amdgpu_device pointer - * - * Returns: - * The number of entries in the root page directory which needs the ATS setting. - */ -static unsigned int amdgpu_vm_pt_num_ats_entries(struct amdgpu_device *adev) -{ - unsigned int shift; - - shift = amdgpu_vm_pt_level_shift(adev, adev->vm_manager.root_level); - return AMDGPU_GMC_HOLE_START >> (shift + AMDGPU_GPU_PAGE_SHIFT); -} - -/** * amdgpu_vm_pt_entries_mask - the mask to get the entry number of a PD/PT * * @adev: amdgpu_device pointer @@ -379,7 +363,7 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct ttm_operation_ctx ctx = { true, false }; struct amdgpu_vm_update_params params; struct amdgpu_bo *ancestor = &vmbo->bo; - unsigned int entries, ats_entries; + unsigned int entries; struct amdgpu_bo *bo = &vmbo->bo; uint64_t addr; int r, idx; @@ -394,27 +378,6 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm, } entries = amdgpu_bo_size(bo) / 8; - if (!vm->pte_support_ats) { - ats_entries = 0; - - } else if (!bo->parent) { - ats_entries = amdgpu_vm_pt_num_ats_entries(adev); - ats_entries = min(ats_entries, entries); - entries -= ats_entries; - - } else { - struct amdgpu_vm_bo_base *pt; - - pt = ancestor->vm_bo; - ats_entries = amdgpu_vm_pt_num_ats_entries(adev); - if ((pt - to_amdgpu_bo_vm(vm->root.bo)->entries) >= - ats_entries) { - ats_entries = 0; - } else { - ats_entries = entries; - entries = 0; - } - } r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (r) @@ -445,44 +408,24 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm, goto exit; addr = 0; - if (ats_entries) { - uint64_t value = 0, flags; - flags = AMDGPU_PTE_DEFAULT_ATC; + uint64_t value = 0, flags = 0; + if (adev->asic_type >= CHIP_VEGA10) { if (level != AMDGPU_VM_PTB) { /* Handle leaf PDEs as PTEs */ flags |= AMDGPU_PDE_PTE; - amdgpu_gmc_get_vm_pde(adev, level, &value, &flags); + amdgpu_gmc_get_vm_pde(adev, level, + &value, &flags); + } else { + /* Workaround for fault priority problem on GMC9 */ + flags = AMDGPU_PTE_EXECUTABLE; } - - r = vm->update_funcs->update(¶ms, vmbo, addr, 0, - ats_entries, value, flags); - if (r) - goto exit; - - addr += ats_entries * 8; } - if (entries) { - uint64_t value = 0, flags = 0; - - if (adev->asic_type >= CHIP_VEGA10) { - if (level != AMDGPU_VM_PTB) { - /* Handle leaf PDEs as PTEs */ - flags |= AMDGPU_PDE_PTE; - amdgpu_gmc_get_vm_pde(adev, level, - &value, &flags); - } else { - /* Workaround for fault priority problem on GMC9 */ - flags = AMDGPU_PTE_EXECUTABLE; - } - } - - r = vm->update_funcs->update(¶ms, vmbo, addr, 0, entries, - value, flags); - if (r) - goto exit; - } + r = vm->update_funcs->update(¶ms, vmbo, addr, 0, entries, + value, flags); + if (r) + goto exit; r = vm->update_funcs->commit(¶ms, NULL); exit: @@ -728,33 +671,6 @@ void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm) } /** - * amdgpu_vm_pt_is_root_clean - check if a root PD is clean - * - * @adev: amdgpu_device pointer - * @vm: the VM to check - * - * Check all entries of the root PD, if any subsequent PDs are allocated, - * it means there are page table creating and filling, and is no a clean - * VM - * - * Returns: - * 0 if this VM is clean - */ -bool amdgpu_vm_pt_is_root_clean(struct amdgpu_device *adev, - struct amdgpu_vm *vm) -{ - enum amdgpu_vm_level root = adev->vm_manager.root_level; - unsigned int entries = amdgpu_vm_pt_num_entries(adev, root); - unsigned int i = 0; - - for (i = 0; i < entries; i++) { - if (to_amdgpu_bo_vm(vm->root.bo)->entries[i].bo) - return false; - } - return true; -} - -/** * amdgpu_vm_pde_update - update a single level in the hierarchy * * @params: parameters for the update @@ -1027,7 +943,7 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params, trace_amdgpu_vm_update_ptes(params, frag_start, upd_end, min(nptes, 32u), dst, incr, upd_flags, - vm->task_info.tgid, + vm->task_info ? vm->task_info->tgid : 0, vm->immediate.fence_context); amdgpu_vm_pte_update_flags(params, to_amdgpu_bo_vm(pt), cursor.level, pe_start, dst, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c index ad44012cc0..c23d97d34b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c @@ -205,7 +205,7 @@ disable_dpm: dpm_ctl &= 0xfffffffe; /* Disable DPM */ WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_enable), dpm_ctl); dev_dbg(adev->dev, "%s: disable vpe dpm\n", __func__); - return 0; + return -EINVAL; } int amdgpu_vpe_psp_update_sram(struct amdgpu_device *adev) @@ -297,6 +297,10 @@ static int vpe_early_init(void *handle) case IP_VERSION(6, 1, 0): vpe_v6_1_set_funcs(vpe); break; + case IP_VERSION(6, 1, 1): + vpe_v6_1_set_funcs(vpe); + vpe->collaborate_mode = true; + break; default: return -EINVAL; } @@ -304,6 +308,8 @@ static int vpe_early_init(void *handle) vpe_set_ring_funcs(adev); vpe_set_regs(vpe); + dev_info(adev->dev, "VPE: collaborate mode %s", vpe->collaborate_mode ? "true" : "false"); + return 0; } @@ -463,6 +469,18 @@ static uint64_t vpe_get_csa_mc_addr(struct amdgpu_ring *ring, uint32_t vmid) return csa_mc_addr; } +static void vpe_ring_emit_pred_exec(struct amdgpu_ring *ring, + uint32_t device_select, + uint32_t exec_count) +{ + if (!ring->adev->vpe.collaborate_mode) + return; + + amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_PRED_EXE, 0) | + (device_select << 16)); + amdgpu_ring_write(ring, exec_count & 0x1fff); +} + static void vpe_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, struct amdgpu_ib *ib, @@ -511,6 +529,8 @@ static void vpe_ring_emit_pipeline_sync(struct amdgpu_ring *ring) uint32_t seq = ring->fence_drv.sync_seq; uint64_t addr = ring->fence_drv.gpu_addr; + vpe_ring_emit_pred_exec(ring, 0, 6); + /* wait for idle */ amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_POLL_REGMEM, VPE_POLL_REGMEM_SUBOP_REGMEM) | @@ -526,6 +546,8 @@ static void vpe_ring_emit_pipeline_sync(struct amdgpu_ring *ring) static void vpe_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) { + vpe_ring_emit_pred_exec(ring, 0, 3); + amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_REG_WRITE, 0)); amdgpu_ring_write(ring, reg << 2); amdgpu_ring_write(ring, val); @@ -534,6 +556,8 @@ static void vpe_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t static void vpe_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, uint32_t val, uint32_t mask) { + vpe_ring_emit_pred_exec(ring, 0, 6); + amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_POLL_REGMEM, VPE_POLL_REGMEM_SUBOP_REGMEM) | VPE_CMD_POLL_REGMEM_HEADER_FUNC(3) | /* equal */ @@ -552,34 +576,21 @@ static void vpe_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned int vmid, amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); } -static unsigned int vpe_ring_init_cond_exec(struct amdgpu_ring *ring) +static unsigned int vpe_ring_init_cond_exec(struct amdgpu_ring *ring, + uint64_t addr) { unsigned int ret; amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_COND_EXE, 0)); - amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, 1); - ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */ - amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */ + ret = ring->wptr & ring->buf_mask; + amdgpu_ring_write(ring, 0); return ret; } -static void vpe_ring_patch_cond_exec(struct amdgpu_ring *ring, unsigned int offset) -{ - unsigned int cur; - - WARN_ON_ONCE(offset > ring->buf_mask); - WARN_ON_ONCE(ring->ring[offset] != 0x55aa55aa); - - cur = (ring->wptr - 1) & ring->buf_mask; - if (cur > offset) - ring->ring[offset] = cur - offset; - else - ring->ring[offset] = (ring->buf_mask + 1) - offset + cur; -} - static int vpe_ring_preempt_ib(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -701,16 +712,22 @@ static void vpe_ring_set_wptr(struct amdgpu_ring *ring) upper_32_bits(ring->wptr << 2)); atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr << 2); WDOORBELL64(ring->doorbell_index, ring->wptr << 2); + if (vpe->collaborate_mode) + WDOORBELL64(ring->doorbell_index + 4, ring->wptr << 2); } else { - dev_dbg(adev->dev, "Not using doorbell, \ - regVPEC_QUEUE0_RB_WPTR == 0x%08x, \ - regVPEC_QUEUE0_RB_WPTR_HI == 0x%08x\n", - lower_32_bits(ring->wptr << 2), - upper_32_bits(ring->wptr << 2)); - WREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_wptr_lo), - lower_32_bits(ring->wptr << 2)); - WREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_wptr_hi), - upper_32_bits(ring->wptr << 2)); + int i; + + for (i = 0; i < vpe->num_instances; i++) { + dev_dbg(adev->dev, "Not using doorbell, \ + regVPEC_QUEUE0_RB_WPTR == 0x%08x, \ + regVPEC_QUEUE0_RB_WPTR_HI == 0x%08x\n", + lower_32_bits(ring->wptr << 2), + upper_32_bits(ring->wptr << 2)); + WREG32(vpe_get_reg_offset(vpe, i, vpe->regs.queue0_rb_wptr_lo), + lower_32_bits(ring->wptr << 2)); + WREG32(vpe_get_reg_offset(vpe, i, vpe->regs.queue0_rb_wptr_hi), + upper_32_bits(ring->wptr << 2)); + } } } @@ -870,7 +887,6 @@ static const struct amdgpu_ring_funcs vpe_ring_funcs = { .test_ring = vpe_ring_test_ring, .test_ib = vpe_ring_test_ib, .init_cond_exec = vpe_ring_init_cond_exec, - .patch_cond_exec = vpe_ring_patch_cond_exec, .preempt_ib = vpe_ring_preempt_ib, .begin_use = vpe_ring_begin_use, .end_use = vpe_ring_end_use, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h index 1153ddaea6..231d86d095 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h @@ -27,6 +27,8 @@ #include "amdgpu_irq.h" #include "vpe_6_1_fw_if.h" +#define AMDGPU_MAX_VPE_INSTANCES 2 + struct amdgpu_vpe; struct vpe_funcs { @@ -74,6 +76,9 @@ struct amdgpu_vpe { uint32_t *cmdbuf_cpu_addr; struct delayed_work idle_work; bool context_started; + + uint32_t num_instances; + bool collaborate_mode; }; int amdgpu_vpe_psp_update_sram(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index a6c88f2fe6..20d51f6c9b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -1035,15 +1035,74 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev) return 0; } +static int xgmi_v6_4_0_aca_bank_generate_report(struct aca_handle *handle, struct aca_bank *bank, enum aca_error_type type, + struct aca_bank_report *report, void *data) +{ + struct amdgpu_device *adev = handle->adev; + const char *error_str; + u64 status; + int ret, ext_error_code; + + ret = aca_bank_info_decode(bank, &report->info); + if (ret) + return ret; + + status = bank->regs[ACA_REG_IDX_STATUS]; + ext_error_code = ACA_REG__STATUS__ERRORCODEEXT(status); + + error_str = ext_error_code < ARRAY_SIZE(xgmi_v6_4_0_ras_error_code_ext) ? + xgmi_v6_4_0_ras_error_code_ext[ext_error_code] : NULL; + if (error_str) + dev_info(adev->dev, "%s detected\n", error_str); + + if ((type == ACA_ERROR_TYPE_UE && ext_error_code == 0) || + (type == ACA_ERROR_TYPE_CE && ext_error_code == 6)) + report->count[type] = ACA_REG__MISC0__ERRCNT(bank->regs[ACA_REG_IDX_MISC0]); + + return 0; +} + +static const struct aca_bank_ops xgmi_v6_4_0_aca_bank_ops = { + .aca_bank_generate_report = xgmi_v6_4_0_aca_bank_generate_report, +}; + +static const struct aca_info xgmi_v6_4_0_aca_info = { + .hwip = ACA_HWIP_TYPE_PCS_XGMI, + .mask = ACA_ERROR_UE_MASK | ACA_ERROR_CE_MASK, + .bank_ops = &xgmi_v6_4_0_aca_bank_ops, +}; + static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) { + int r; + if (!adev->gmc.xgmi.supported || adev->gmc.xgmi.num_physical_nodes == 0) return 0; amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL); - return amdgpu_ras_block_late_init(adev, ras_block); + r = amdgpu_ras_block_late_init(adev, ras_block); + if (r) + return r; + + switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) { + case IP_VERSION(6, 4, 0): + r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL, + &xgmi_v6_4_0_aca_info, NULL); + if (r) + goto late_fini; + break; + default: + break; + } + + return 0; + +late_fini: + amdgpu_ras_block_late_fini(adev, ras_block); + + return r; } uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev, @@ -1099,7 +1158,7 @@ static void amdgpu_xgmi_legacy_reset_ras_error_count(struct amdgpu_device *adev) static void __xgmi_v6_4_0_reset_error_count(struct amdgpu_device *adev, int xgmi_inst, u64 mca_base) { - WREG64_MCA(xgmi_inst, mca_base, MCA_REG_IDX_STATUS, 0ULL); + WREG64_MCA(xgmi_inst, mca_base, ACA_REG_IDX_STATUS, 0ULL); } static void xgmi_v6_4_0_reset_error_count(struct amdgpu_device *adev, int xgmi_inst) @@ -1277,12 +1336,12 @@ static void amdgpu_xgmi_legacy_query_ras_error_count(struct amdgpu_device *adev, err_data->ce_count += ce_cnt; } -static enum amdgpu_mca_error_type xgmi_v6_4_0_pcs_mca_get_error_type(struct amdgpu_device *adev, u64 status) +static enum aca_error_type xgmi_v6_4_0_pcs_mca_get_error_type(struct amdgpu_device *adev, u64 status) { const char *error_str; int ext_error_code; - ext_error_code = MCA_REG__STATUS__ERRORCODEEXT(status); + ext_error_code = ACA_REG__STATUS__ERRORCODEEXT(status); error_str = ext_error_code < ARRAY_SIZE(xgmi_v6_4_0_ras_error_code_ext) ? xgmi_v6_4_0_ras_error_code_ext[ext_error_code] : NULL; @@ -1291,9 +1350,9 @@ static enum amdgpu_mca_error_type xgmi_v6_4_0_pcs_mca_get_error_type(struct amdg switch (ext_error_code) { case 0: - return AMDGPU_MCA_ERROR_TYPE_UE; + return ACA_ERROR_TYPE_UE; case 6: - return AMDGPU_MCA_ERROR_TYPE_CE; + return ACA_ERROR_TYPE_CE; default: return -EINVAL; } @@ -1307,22 +1366,22 @@ static void __xgmi_v6_4_0_query_error_count(struct amdgpu_device *adev, struct a int xgmi_inst = mcm_info->die_id; u64 status = 0; - status = RREG64_MCA(xgmi_inst, mca_base, MCA_REG_IDX_STATUS); - if (!MCA_REG__STATUS__VAL(status)) + status = RREG64_MCA(xgmi_inst, mca_base, ACA_REG_IDX_STATUS); + if (!ACA_REG__STATUS__VAL(status)) return; switch (xgmi_v6_4_0_pcs_mca_get_error_type(adev, status)) { - case AMDGPU_MCA_ERROR_TYPE_UE: + case ACA_ERROR_TYPE_UE: amdgpu_ras_error_statistic_ue_count(err_data, mcm_info, NULL, 1ULL); break; - case AMDGPU_MCA_ERROR_TYPE_CE: + case ACA_ERROR_TYPE_CE: amdgpu_ras_error_statistic_ce_count(err_data, mcm_info, NULL, 1ULL); break; default: break; } - WREG64_MCA(xgmi_inst, mca_base, MCA_REG_IDX_STATUS, 0ULL); + WREG64_MCA(xgmi_inst, mca_base, ACA_REG_IDX_STATUS, 0ULL); } static void xgmi_v6_4_0_query_error_count(struct amdgpu_device *adev, int xgmi_inst, struct ras_err_data *err_data) diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v4_1_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v4_1_0.c new file mode 100644 index 0000000000..8a0773b808 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/athub_v4_1_0.c @@ -0,0 +1,122 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "athub_v4_1_0.h" +#include "athub/athub_4_1_0_offset.h" +#include "athub/athub_4_1_0_sh_mask.h" +#include "soc15_common.h" + +static uint32_t athub_v4_1_0_get_cg_cntl(struct amdgpu_device *adev) +{ + uint32_t data; + + switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) { + case IP_VERSION(4, 1, 0): + data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL); + break; + default: + data = 0; + break; + } + return data; +} + +static void athub_v4_1_0_set_cg_cntl(struct amdgpu_device *adev, uint32_t data) +{ + switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) { + case IP_VERSION(4, 1, 0): + WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data); + break; + default: + break; + } +} + +static void +athub_v4_1_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + def = data = athub_v4_1_0_get_cg_cntl(adev); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ATHUB_MGCG)) + data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK; + else + data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK; + + if (def != data) + athub_v4_1_0_set_cg_cntl(adev, data); +} + +static void +athub_v4_1_0_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + def = data = athub_v4_1_0_get_cg_cntl(adev); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ATHUB_LS)) + data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; + else + data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; + + if (def != data) + athub_v4_1_0_set_cg_cntl(adev, data); +} + +int athub_v4_1_0_set_clockgating(struct amdgpu_device *adev, + enum amd_clockgating_state state) +{ + if (amdgpu_sriov_vf(adev)) + return 0; + + switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) { + case IP_VERSION(4, 1, 0): + athub_v4_1_0_update_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE); + athub_v4_1_0_update_medium_grain_light_sleep(adev, + state == AMD_CG_STATE_GATE); + break; + default: + break; + } + + return 0; +} + +void athub_v4_1_0_get_clockgating(struct amdgpu_device *adev, u64 *flags) +{ + int data; + + /* AMD_CG_SUPPORT_ATHUB_MGCG */ + data = athub_v4_1_0_get_cg_cntl(adev); + if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_ATHUB_MGCG; + + /* AMD_CG_SUPPORT_ATHUB_LS */ + if (data & ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_ATHUB_LS; +} diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v4_1_0.h b/drivers/gpu/drm/amd/amdgpu/athub_v4_1_0.h new file mode 100644 index 0000000000..4d18d0998f --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/athub_v4_1_0.h @@ -0,0 +1,30 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __ATHUB_V4_1_0_H__ +#define __ATHUB_V4_1_0_H__ + +int athub_v4_1_0_set_clockgating(struct amdgpu_device *adev, + enum amd_clockgating_state state); +void athub_v4_1_0_get_clockgating(struct amdgpu_device *adev, u64 *flags); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c index ac51d19b51..72362df352 100644 --- a/drivers/gpu/drm/amd/amdgpu/atom.c +++ b/drivers/gpu/drm/amd/amdgpu/atom.c @@ -62,6 +62,7 @@ typedef struct { struct atom_context *ctx; uint32_t *ps, *ws; + int ps_size, ws_size; int ps_shift; uint16_t start; unsigned last_jump; @@ -70,8 +71,8 @@ typedef struct { } atom_exec_context; int amdgpu_atom_debug; -static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, uint32_t *params); -int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params); +static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, uint32_t *params, int params_size); +int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params, int params_size); static uint32_t atom_arg_mask[8] = { 0xFFFFFFFF, 0xFFFF, 0xFFFF00, 0xFFFF0000, 0xFF, 0xFF00, 0xFF0000, @@ -223,7 +224,10 @@ static uint32_t atom_get_src_int(atom_exec_context *ctx, uint8_t attr, (*ptr)++; /* get_unaligned_le32 avoids unaligned accesses from atombios * tables, noticed on a DEC Alpha. */ - val = get_unaligned_le32((u32 *)&ctx->ps[idx]); + if (idx < ctx->ps_size) + val = get_unaligned_le32((u32 *)&ctx->ps[idx]); + else + pr_info("PS index out of range: %i > %i\n", idx, ctx->ps_size); if (print) DEBUG("PS[0x%02X,0x%04X]", idx, val); break; @@ -261,7 +265,10 @@ static uint32_t atom_get_src_int(atom_exec_context *ctx, uint8_t attr, val = gctx->reg_block; break; default: - val = ctx->ws[idx]; + if (idx < ctx->ws_size) + val = ctx->ws[idx]; + else + pr_info("WS index out of range: %i > %i\n", idx, ctx->ws_size); } break; case ATOM_ARG_ID: @@ -495,6 +502,10 @@ static void atom_put_dst(atom_exec_context *ctx, int arg, uint8_t attr, idx = U8(*ptr); (*ptr)++; DEBUG("PS[0x%02X]", idx); + if (idx >= ctx->ps_size) { + pr_info("PS index out of range: %i > %i\n", idx, ctx->ps_size); + return; + } ctx->ps[idx] = cpu_to_le32(val); break; case ATOM_ARG_WS: @@ -527,6 +538,10 @@ static void atom_put_dst(atom_exec_context *ctx, int arg, uint8_t attr, gctx->reg_block = val; break; default: + if (idx >= ctx->ws_size) { + pr_info("WS index out of range: %i > %i\n", idx, ctx->ws_size); + return; + } ctx->ws[idx] = val; } break; @@ -624,7 +639,7 @@ static void atom_op_calltable(atom_exec_context *ctx, int *ptr, int arg) else SDEBUG(" table: %d\n", idx); if (U16(ctx->ctx->cmd_table + 4 + 2 * idx)) - r = amdgpu_atom_execute_table_locked(ctx->ctx, idx, ctx->ps + ctx->ps_shift); + r = amdgpu_atom_execute_table_locked(ctx->ctx, idx, ctx->ps + ctx->ps_shift, ctx->ps_size - ctx->ps_shift); if (r) { ctx->abort = true; } @@ -1203,7 +1218,7 @@ static struct { atom_op_div32, ATOM_ARG_WS}, }; -static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, uint32_t *params) +static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, uint32_t *params, int params_size) { int base = CU16(ctx->cmd_table + 4 + 2 * index); int len, ws, ps, ptr; @@ -1225,12 +1240,16 @@ static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, ectx.ps_shift = ps / 4; ectx.start = base; ectx.ps = params; + ectx.ps_size = params_size; ectx.abort = false; ectx.last_jump = 0; - if (ws) + if (ws) { ectx.ws = kcalloc(4, ws, GFP_KERNEL); - else + ectx.ws_size = ws; + } else { ectx.ws = NULL; + ectx.ws_size = 0; + } debug_depth++; while (1) { @@ -1264,7 +1283,7 @@ free: return ret; } -int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params) +int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params, int params_size) { int r; @@ -1280,7 +1299,7 @@ int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *par /* reset divmul */ ctx->divmul[0] = 0; ctx->divmul[1] = 0; - r = amdgpu_atom_execute_table_locked(ctx, index, params); + r = amdgpu_atom_execute_table_locked(ctx, index, params, params_size); mutex_unlock(&ctx->mutex); return r; } @@ -1552,7 +1571,7 @@ int amdgpu_atom_asic_init(struct atom_context *ctx) if (!CU16(ctx->cmd_table + 4 + 2 * ATOM_CMD_INIT)) return 1; - ret = amdgpu_atom_execute_table(ctx, ATOM_CMD_INIT, ps); + ret = amdgpu_atom_execute_table(ctx, ATOM_CMD_INIT, ps, 16); if (ret) return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/atom.h b/drivers/gpu/drm/amd/amdgpu/atom.h index c11cf18a0f..b807f6639a 100644 --- a/drivers/gpu/drm/amd/amdgpu/atom.h +++ b/drivers/gpu/drm/amd/amdgpu/atom.h @@ -156,7 +156,7 @@ struct atom_context { extern int amdgpu_atom_debug; struct atom_context *amdgpu_atom_parse(struct card_info *card, void *bios); -int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params); +int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params, int params_size); int amdgpu_atom_asic_init(struct atom_context *ctx); void amdgpu_atom_destroy(struct atom_context *ctx); bool amdgpu_atom_parse_data_header(struct atom_context *ctx, int index, uint16_t *size, diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c index 10098fdd33..3dfc28840a 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c @@ -77,7 +77,7 @@ void amdgpu_atombios_crtc_overscan_setup(struct drm_crtc *crtc, args.usOverscanTop = cpu_to_le16(amdgpu_crtc->v_border); break; } - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } void amdgpu_atombios_crtc_scaler_setup(struct drm_crtc *crtc) @@ -106,7 +106,7 @@ void amdgpu_atombios_crtc_scaler_setup(struct drm_crtc *crtc) args.ucEnable = ATOM_SCALER_DISABLE; break; } - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } void amdgpu_atombios_crtc_lock(struct drm_crtc *crtc, int lock) @@ -123,7 +123,7 @@ void amdgpu_atombios_crtc_lock(struct drm_crtc *crtc, int lock) args.ucCRTC = amdgpu_crtc->crtc_id; args.ucEnable = lock; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } void amdgpu_atombios_crtc_enable(struct drm_crtc *crtc, int state) @@ -139,7 +139,7 @@ void amdgpu_atombios_crtc_enable(struct drm_crtc *crtc, int state) args.ucCRTC = amdgpu_crtc->crtc_id; args.ucEnable = state; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } void amdgpu_atombios_crtc_blank(struct drm_crtc *crtc, int state) @@ -155,7 +155,7 @@ void amdgpu_atombios_crtc_blank(struct drm_crtc *crtc, int state) args.ucCRTC = amdgpu_crtc->crtc_id; args.ucBlanking = state; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } void amdgpu_atombios_crtc_powergate(struct drm_crtc *crtc, int state) @@ -171,7 +171,7 @@ void amdgpu_atombios_crtc_powergate(struct drm_crtc *crtc, int state) args.ucDispPipeId = amdgpu_crtc->crtc_id; args.ucEnable = state; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } void amdgpu_atombios_crtc_powergate_init(struct amdgpu_device *adev) @@ -183,7 +183,7 @@ void amdgpu_atombios_crtc_powergate_init(struct amdgpu_device *adev) args.ucEnable = ATOM_INIT; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } void amdgpu_atombios_crtc_set_dtd_timing(struct drm_crtc *crtc, @@ -228,7 +228,7 @@ void amdgpu_atombios_crtc_set_dtd_timing(struct drm_crtc *crtc, args.susModeMiscInfo.usAccess = cpu_to_le16(misc); args.ucCRTC = amdgpu_crtc->crtc_id; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } union atom_enable_ss { @@ -293,7 +293,7 @@ static void amdgpu_atombios_crtc_program_ss(struct amdgpu_device *adev, args.v3.usSpreadSpectrumStep = cpu_to_le16(ss->step); args.v3.ucEnable = enable; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } union adjust_pixel_clock { @@ -395,7 +395,7 @@ static u32 amdgpu_atombios_crtc_adjust_pll(struct drm_crtc *crtc, ADJUST_DISPLAY_CONFIG_SS_ENABLE; amdgpu_atom_execute_table(adev->mode_info.atom_context, - index, (uint32_t *)&args); + index, (uint32_t *)&args, sizeof(args)); adjusted_clock = le16_to_cpu(args.v1.usPixelClock) * 10; break; case 3: @@ -428,7 +428,7 @@ static u32 amdgpu_atombios_crtc_adjust_pll(struct drm_crtc *crtc, args.v3.sInput.ucExtTransmitterID = 0; amdgpu_atom_execute_table(adev->mode_info.atom_context, - index, (uint32_t *)&args); + index, (uint32_t *)&args, sizeof(args)); adjusted_clock = le32_to_cpu(args.v3.sOutput.ulDispPllFreq) * 10; if (args.v3.sOutput.ucRefDiv) { amdgpu_crtc->pll_flags |= AMDGPU_PLL_USE_FRAC_FB_DIV; @@ -514,7 +514,7 @@ void amdgpu_atombios_crtc_set_disp_eng_pll(struct amdgpu_device *adev, DRM_ERROR("Unknown table version %d %d\n", frev, crev); return; } - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } union set_dce_clock { @@ -544,7 +544,7 @@ u32 amdgpu_atombios_crtc_set_dce_clock(struct amdgpu_device *adev, args.v2_1.asParam.ulDCEClkFreq = cpu_to_le32(freq); /* 10kHz units */ args.v2_1.asParam.ucDCEClkType = clk_type; args.v2_1.asParam.ucDCEClkSrc = clk_src; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); ret_freq = le32_to_cpu(args.v2_1.asParam.ulDCEClkFreq) * 10; break; default: @@ -740,7 +740,7 @@ void amdgpu_atombios_crtc_program_pll(struct drm_crtc *crtc, return; } - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } int amdgpu_atombios_crtc_prepare_pll(struct drm_crtc *crtc, diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c index 87c41e0e9b..622634c08c 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c @@ -83,7 +83,7 @@ static int amdgpu_atombios_dp_process_aux_ch(struct amdgpu_i2c_chan *chan, args.v2.ucDelay = delay / 10; args.v2.ucHPD_ID = chan->rec.hpd; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); *ack = args.v2.ucReplyStatus; @@ -301,7 +301,7 @@ static u8 amdgpu_atombios_dp_encoder_service(struct amdgpu_device *adev, args.ucLaneNum = lane_num; args.ucStatus = 0; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); return args.ucStatus; } diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c index 7672abe6c1..25feab188d 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c @@ -335,7 +335,7 @@ amdgpu_atombios_encoder_setup_dac(struct drm_encoder *encoder, int action) args.ucDacStandard = ATOM_DAC1_PS2; args.usPixelClock = cpu_to_le16(amdgpu_encoder->pixel_clock / 10); - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } @@ -432,7 +432,7 @@ amdgpu_atombios_encoder_setup_dvo(struct drm_encoder *encoder, int action) break; } - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } int amdgpu_atombios_encoder_get_encoder_mode(struct drm_encoder *encoder) @@ -732,7 +732,7 @@ amdgpu_atombios_encoder_setup_dig_encoder(struct drm_encoder *encoder, break; } - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } @@ -1136,7 +1136,7 @@ amdgpu_atombios_encoder_setup_dig_transmitter(struct drm_encoder *encoder, int a break; } - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } bool @@ -1164,7 +1164,7 @@ amdgpu_atombios_encoder_set_edp_panel_power(struct drm_connector *connector, args.v1.ucAction = action; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); /* wait for the panel to power up */ if (action == ATOM_TRANSMITTER_ACTION_POWER_ON) { @@ -1288,7 +1288,7 @@ amdgpu_atombios_encoder_setup_external_encoder(struct drm_encoder *encoder, DRM_ERROR("Unknown table version: %d, %d\n", frev, crev); return; } - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } static void @@ -1633,7 +1633,7 @@ amdgpu_atombios_encoder_set_crtc_source(struct drm_encoder *encoder) return; } - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } /* This only needs to be called once at startup */ @@ -1706,7 +1706,7 @@ amdgpu_atombios_encoder_dac_load_detect(struct drm_encoder *encoder, args.sDacload.ucMisc = DAC_LOAD_MISC_YPrPb; } - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); return true; } else diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c index af0335535f..a650111432 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c @@ -86,7 +86,7 @@ static int amdgpu_atombios_i2c_process_i2c_ch(struct amdgpu_i2c_chan *chan, args.ucSlaveAddr = slave_addr << 1; args.ucLineNumber = chan->rec.i2c_id; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); /* error */ if (args.ucStatus != HW_ASSISTED_I2C_STATUS_SUCCESS) { @@ -172,5 +172,5 @@ void amdgpu_atombios_i2c_channel_trans(struct amdgpu_device *adev, u8 slave_addr args.ucSlaveAddr = slave_addr; args.ucLineNumber = line_number; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 4dfaa017cf..a3a643254d 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1638,28 +1638,18 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev) PCI_EXP_LNKCTL_HAWD); /* linkctl2 */ - pcie_capability_read_word(root, PCI_EXP_LNKCTL2, - &tmp16); - tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP | - PCI_EXP_LNKCTL2_TX_MARGIN); - tmp16 |= (bridge_cfg2 & - (PCI_EXP_LNKCTL2_ENTER_COMP | - PCI_EXP_LNKCTL2_TX_MARGIN)); - pcie_capability_write_word(root, - PCI_EXP_LNKCTL2, - tmp16); - - pcie_capability_read_word(adev->pdev, - PCI_EXP_LNKCTL2, - &tmp16); - tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP | - PCI_EXP_LNKCTL2_TX_MARGIN); - tmp16 |= (gpu_cfg2 & - (PCI_EXP_LNKCTL2_ENTER_COMP | - PCI_EXP_LNKCTL2_TX_MARGIN)); - pcie_capability_write_word(adev->pdev, - PCI_EXP_LNKCTL2, - tmp16); + pcie_capability_clear_and_set_word(root, PCI_EXP_LNKCTL2, + PCI_EXP_LNKCTL2_ENTER_COMP | + PCI_EXP_LNKCTL2_TX_MARGIN, + bridge_cfg2 & + (PCI_EXP_LNKCTL2_ENTER_COMP | + PCI_EXP_LNKCTL2_TX_MARGIN)); + pcie_capability_clear_and_set_word(adev->pdev, PCI_EXP_LNKCTL2, + PCI_EXP_LNKCTL2_ENTER_COMP | + PCI_EXP_LNKCTL2_TX_MARGIN, + gpu_cfg2 & + (PCI_EXP_LNKCTL2_ENTER_COMP | + PCI_EXP_LNKCTL2_TX_MARGIN)); tmp = RREG32_PCIE(ixPCIE_LC_CNTL4); tmp &= ~PCIE_LC_CNTL4__LC_SET_QUIESCE_MASK; @@ -1674,16 +1664,15 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev) speed_cntl &= ~PCIE_LC_SPEED_CNTL__LC_FORCE_DIS_SW_SPEED_CHANGE_MASK; WREG32_PCIE(ixPCIE_LC_SPEED_CNTL, speed_cntl); - pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL2, &tmp16); - tmp16 &= ~PCI_EXP_LNKCTL2_TLS; - + tmp16 = 0; if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */ else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2) tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */ else tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */ - pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL2, tmp16); + pcie_capability_clear_and_set_word(adev->pdev, PCI_EXP_LNKCTL2, + PCI_EXP_LNKCTL2_TLS, tmp16); speed_cntl = RREG32_PCIE(ixPCIE_LC_SPEED_CNTL); speed_cntl |= PCIE_LC_SPEED_CNTL__LC_INITIATE_LINK_SPEED_CHANGE_MASK; diff --git a/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h index 567a904804..9c85ca6358 100644 --- a/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h +++ b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h @@ -21,8 +21,7 @@ * */ -static const unsigned int gfx9_SECT_CONTEXT_def_1[] = -{ +static const unsigned int gfx9_SECT_CONTEXT_def_1[] = { 0x00000000, // DB_RENDER_CONTROL 0x00000000, // DB_COUNT_CONTROL 0x00000000, // DB_DEPTH_VIEW @@ -236,8 +235,7 @@ static const unsigned int gfx9_SECT_CONTEXT_def_1[] = 0x00000000, // PA_SC_VPORT_ZMIN_15 0x3f800000, // PA_SC_VPORT_ZMAX_15 }; -static const unsigned int gfx9_SECT_CONTEXT_def_2[] = -{ +static const unsigned int gfx9_SECT_CONTEXT_def_2[] = { 0x00000000, // PA_SC_SCREEN_EXTENT_CONTROL 0x00000000, // PA_SC_TILE_STEERING_OVERRIDE 0x00000000, // CP_PERFMON_CNTX_CNTL @@ -521,15 +519,13 @@ static const unsigned int gfx9_SECT_CONTEXT_def_2[] = 0x00000000, // CB_MRT6_EPITCH 0x00000000, // CB_MRT7_EPITCH }; -static const unsigned int gfx9_SECT_CONTEXT_def_3[] = -{ +static const unsigned int gfx9_SECT_CONTEXT_def_3[] = { 0x00000000, // PA_CL_POINT_X_RAD 0x00000000, // PA_CL_POINT_Y_RAD 0x00000000, // PA_CL_POINT_SIZE 0x00000000, // PA_CL_POINT_CULL_RAD }; -static const unsigned int gfx9_SECT_CONTEXT_def_4[] = -{ +static const unsigned int gfx9_SECT_CONTEXT_def_4[] = { 0x00000000, // DB_DEPTH_CONTROL 0x00000000, // DB_EQAA 0x00000000, // CB_COLOR_CONTROL @@ -688,17 +684,14 @@ static const unsigned int gfx9_SECT_CONTEXT_def_4[] = 0x00000000, // VGT_GS_OUT_PRIM_TYPE 0x00000000, // IA_ENHANCE }; -static const unsigned int gfx9_SECT_CONTEXT_def_5[] = -{ +static const unsigned int gfx9_SECT_CONTEXT_def_5[] = { 0x00000000, // WD_ENHANCE 0x00000000, // VGT_PRIMITIVEID_EN }; -static const unsigned int gfx9_SECT_CONTEXT_def_6[] = -{ +static const unsigned int gfx9_SECT_CONTEXT_def_6[] = { 0x00000000, // VGT_PRIMITIVEID_RESET }; -static const unsigned int gfx9_SECT_CONTEXT_def_7[] = -{ +static const unsigned int gfx9_SECT_CONTEXT_def_7[] = { 0x00000000, // VGT_GS_MAX_PRIMS_PER_SUBGROUP 0x00000000, // VGT_DRAW_PAYLOAD_CNTL 0, // HOLE @@ -766,8 +759,7 @@ static const unsigned int gfx9_SECT_CONTEXT_def_7[] = 0x00000000, // VGT_STRMOUT_CONFIG 0x00000000, // VGT_STRMOUT_BUFFER_CONFIG }; -static const unsigned int gfx9_SECT_CONTEXT_def_8[] = -{ +static const unsigned int gfx9_SECT_CONTEXT_def_8[] = { 0x00000000, // PA_SC_CENTROID_PRIORITY_0 0x00000000, // PA_SC_CENTROID_PRIORITY_1 0x00001000, // PA_SC_LINE_CNTL @@ -924,8 +916,7 @@ static const unsigned int gfx9_SECT_CONTEXT_def_8[] = 0x00000000, // CB_COLOR7_DCC_BASE 0x00000000, // CB_COLOR7_DCC_BASE_EXT }; -static const struct cs_extent_def gfx9_SECT_CONTEXT_defs[] = -{ +static const struct cs_extent_def gfx9_SECT_CONTEXT_defs[] = { {gfx9_SECT_CONTEXT_def_1, 0x0000a000, 212 }, {gfx9_SECT_CONTEXT_def_2, 0x0000a0d6, 282 }, {gfx9_SECT_CONTEXT_def_3, 0x0000a1f5, 4 }, diff --git a/drivers/gpu/drm/amd/amdgpu/clearstate_si.h b/drivers/gpu/drm/amd/amdgpu/clearstate_si.h index 66e39cdb5c..5fd96ddd7f 100644 --- a/drivers/gpu/drm/amd/amdgpu/clearstate_si.h +++ b/drivers/gpu/drm/amd/amdgpu/clearstate_si.h @@ -21,8 +21,7 @@ * */ -static const u32 si_SECT_CONTEXT_def_1[] = -{ +static const u32 si_SECT_CONTEXT_def_1[] = { 0x00000000, // DB_RENDER_CONTROL 0x00000000, // DB_COUNT_CONTROL 0x00000000, // DB_DEPTH_VIEW @@ -236,8 +235,7 @@ static const u32 si_SECT_CONTEXT_def_1[] = 0x00000000, // PA_SC_VPORT_ZMIN_15 0x3f800000, // PA_SC_VPORT_ZMAX_15 }; -static const u32 si_SECT_CONTEXT_def_2[] = -{ +static const u32 si_SECT_CONTEXT_def_2[] = { 0x00000000, // CP_PERFMON_CNTX_CNTL 0x00000000, // CP_RINGID 0x00000000, // CP_VMID @@ -511,8 +509,7 @@ static const u32 si_SECT_CONTEXT_def_2[] = 0x00000000, // CB_BLEND6_CONTROL 0x00000000, // CB_BLEND7_CONTROL }; -static const u32 si_SECT_CONTEXT_def_3[] = -{ +static const u32 si_SECT_CONTEXT_def_3[] = { 0x00000000, // PA_CL_POINT_X_RAD 0x00000000, // PA_CL_POINT_Y_RAD 0x00000000, // PA_CL_POINT_SIZE @@ -520,8 +517,7 @@ static const u32 si_SECT_CONTEXT_def_3[] = 0x00000000, // VGT_DMA_BASE_HI 0x00000000, // VGT_DMA_BASE }; -static const u32 si_SECT_CONTEXT_def_4[] = -{ +static const u32 si_SECT_CONTEXT_def_4[] = { 0x00000000, // DB_DEPTH_CONTROL 0x00000000, // DB_EQAA 0x00000000, // CB_COLOR_CONTROL @@ -680,16 +676,13 @@ static const u32 si_SECT_CONTEXT_def_4[] = 0x00000000, // VGT_GS_OUT_PRIM_TYPE 0x00000000, // IA_ENHANCE }; -static const u32 si_SECT_CONTEXT_def_5[] = -{ +static const u32 si_SECT_CONTEXT_def_5[] = { 0x00000000, // VGT_PRIMITIVEID_EN }; -static const u32 si_SECT_CONTEXT_def_6[] = -{ +static const u32 si_SECT_CONTEXT_def_6[] = { 0x00000000, // VGT_PRIMITIVEID_RESET }; -static const u32 si_SECT_CONTEXT_def_7[] = -{ +static const u32 si_SECT_CONTEXT_def_7[] = { 0x00000000, // VGT_MULTI_PRIM_IB_RESET_EN 0, // HOLE 0, // HOLE @@ -924,8 +917,7 @@ static const u32 si_SECT_CONTEXT_def_7[] = 0x00000000, // CB_COLOR7_CLEAR_WORD0 0x00000000, // CB_COLOR7_CLEAR_WORD1 }; -static const struct cs_extent_def si_SECT_CONTEXT_defs[] = -{ +static const struct cs_extent_def si_SECT_CONTEXT_defs[] = { {si_SECT_CONTEXT_def_1, 0x0000a000, 212 }, {si_SECT_CONTEXT_def_2, 0x0000a0d8, 272 }, {si_SECT_CONTEXT_def_3, 0x0000a1f5, 6 }, diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 587ee632a3..221af054d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -52,6 +52,7 @@ static void dce_v10_0_set_display_funcs(struct amdgpu_device *adev); static void dce_v10_0_set_irq_funcs(struct amdgpu_device *adev); +static void dce_v10_0_hpd_int_ack(struct amdgpu_device *adev, int hpd); static const u32 crtc_offsets[] = { CRTC0_REGISTER_OFFSET, @@ -364,6 +365,7 @@ static void dce_v10_0_hpd_init(struct amdgpu_device *adev) AMDGPU_HPD_DISCONNECT_INT_DELAY_IN_MS); WREG32(mmDC_HPD_TOGGLE_FILT_CNTL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp); + dce_v10_0_hpd_int_ack(adev, amdgpu_connector->hpd.hpd); dce_v10_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd); amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index f22ec27365..69e8b0db6c 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -52,6 +52,7 @@ static void dce_v11_0_set_display_funcs(struct amdgpu_device *adev); static void dce_v11_0_set_irq_funcs(struct amdgpu_device *adev); +static void dce_v11_0_hpd_int_ack(struct amdgpu_device *adev, int hpd); static const u32 crtc_offsets[] = { @@ -388,6 +389,7 @@ static void dce_v11_0_hpd_init(struct amdgpu_device *adev) AMDGPU_HPD_DISCONNECT_INT_DELAY_IN_MS); WREG32(mmDC_HPD_TOGGLE_FILT_CNTL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp); + dce_v11_0_hpd_int_ack(adev, amdgpu_connector->hpd.hpd); dce_v11_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd); amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 4dbe9b3259..60d40201fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -273,6 +273,21 @@ static void dce_v6_0_hpd_set_polarity(struct amdgpu_device *adev, WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp); } +static void dce_v6_0_hpd_int_ack(struct amdgpu_device *adev, + int hpd) +{ + u32 tmp; + + if (hpd >= adev->mode_info.num_hpd) { + DRM_DEBUG("invalid hdp %d\n", hpd); + return; + } + + tmp = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]); + tmp |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_ACK_MASK; + WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp); +} + /** * dce_v6_0_hpd_init - hpd setup callback. * @@ -312,6 +327,7 @@ static void dce_v6_0_hpd_init(struct amdgpu_device *adev) continue; } + dce_v6_0_hpd_int_ack(adev, amdgpu_connector->hpd.hpd); dce_v6_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd); amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } @@ -3089,7 +3105,7 @@ static int dce_v6_0_hpd_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - uint32_t disp_int, mask, tmp; + uint32_t disp_int, mask; unsigned hpd; if (entry->src_data[0] >= adev->mode_info.num_hpd) { @@ -3102,9 +3118,7 @@ static int dce_v6_0_hpd_irq(struct amdgpu_device *adev, mask = interrupt_status_offsets[hpd].hpd; if (disp_int & mask) { - tmp = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]); - tmp |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_ACK_MASK; - WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp); + dce_v6_0_hpd_int_ack(adev, hpd); schedule_delayed_work(&adev->hotplug_work, 0); DRM_DEBUG("IH: HPD%d\n", hpd + 1); } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index 05bcce2338..5a5fcc45e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -265,6 +265,21 @@ static void dce_v8_0_hpd_set_polarity(struct amdgpu_device *adev, WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp); } +static void dce_v8_0_hpd_int_ack(struct amdgpu_device *adev, + int hpd) +{ + u32 tmp; + + if (hpd >= adev->mode_info.num_hpd) { + DRM_DEBUG("invalid hdp %d\n", hpd); + return; + } + + tmp = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]); + tmp |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_ACK_MASK; + WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp); +} + /** * dce_v8_0_hpd_init - hpd setup callback. * @@ -304,6 +319,7 @@ static void dce_v8_0_hpd_init(struct amdgpu_device *adev) continue; } + dce_v8_0_hpd_int_ack(adev, amdgpu_connector->hpd.hpd); dce_v8_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd); amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } @@ -3177,7 +3193,7 @@ static int dce_v8_0_hpd_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - uint32_t disp_int, mask, tmp; + uint32_t disp_int, mask; unsigned hpd; if (entry->src_data[0] >= adev->mode_info.num_hpd) { @@ -3190,9 +3206,7 @@ static int dce_v8_0_hpd_irq(struct amdgpu_device *adev, mask = interrupt_status_offsets[hpd].hpd; if (disp_int & mask) { - tmp = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]); - tmp |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_ACK_MASK; - WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp); + dce_v8_0_hpd_int_ack(adev, hpd); schedule_delayed_work(&adev->hotplug_work, 0); DRM_DEBUG("IH: HPD%d\n", hpd + 1); } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 42392a97da..701146d649 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3657,6 +3657,9 @@ static void gfx_v10_0_init_spm_golden_registers(struct amdgpu_device *adev) static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev) { + if (amdgpu_sriov_vf(adev)) + return; + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(10, 1, 10): soc15_program_register_sequence(adev, @@ -4490,7 +4493,7 @@ static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, static int gfx_v10_0_sw_init(void *handle) { int i, j, k, r, ring_id = 0; - struct amdgpu_kiq *kiq; + int xcc_id = 0; struct amdgpu_device *adev = (struct amdgpu_device *)handle; switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { @@ -4619,8 +4622,7 @@ static int gfx_v10_0_sw_init(void *handle) return r; } - kiq = &adev->gfx.kiq[0]; - r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, 0); + r = amdgpu_gfx_kiq_init_ring(adev, xcc_id); if (r) return r; } @@ -4983,7 +4985,8 @@ static void gfx_v10_0_constants_init(struct amdgpu_device *adev) u32 tmp; int i; - WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); + if (!amdgpu_sriov_vf(adev)) + WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); gfx_v10_0_setup_rb(adev); gfx_v10_0_get_cu_info(adev, &adev->gfx.cu_info); @@ -7164,7 +7167,7 @@ static int gfx_v10_0_hw_init(void *handle) if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 3, 0)) gfx_v10_3_program_pbb_mode(adev); - if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0)) + if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0) && !amdgpu_sriov_vf(adev)) gfx_v10_3_set_power_brake_sequence(adev); return r; @@ -7947,7 +7950,7 @@ static void gfx_v10_0_update_spm_vmid_internal(struct amdgpu_device *adev, WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data); } -static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, unsigned int vmid) +static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned int vmid) { amdgpu_gfx_off_ctrl(adev, false); @@ -8543,34 +8546,23 @@ static void gfx_v10_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, amdgpu_ring_write(ring, 0); } -static unsigned int gfx_v10_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) +static unsigned int gfx_v10_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring, + uint64_t addr) { unsigned int ret; amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); - amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + /* discard following DWs if *cond_exec_gpu_addr==0 */ + amdgpu_ring_write(ring, 0); ret = ring->wptr & ring->buf_mask; - amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ + /* patch dummy value later */ + amdgpu_ring_write(ring, 0); return ret; } -static void gfx_v10_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned int offset) -{ - unsigned int cur; - - BUG_ON(offset > ring->buf_mask); - BUG_ON(ring->ring[offset] != 0x55aa55aa); - - cur = (ring->wptr - 1) & ring->buf_mask; - if (likely(cur > offset)) - ring->ring[offset] = cur - offset; - else - ring->ring[offset] = (ring->buf_mask + 1) - offset + cur; -} - static int gfx_v10_0_ring_preempt_ib(struct amdgpu_ring *ring) { int i, r = 0; @@ -9225,7 +9217,6 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { .emit_switch_buffer = gfx_v10_0_ring_emit_sb, .emit_cntxcntl = gfx_v10_0_ring_emit_cntxcntl, .init_cond_exec = gfx_v10_0_ring_emit_init_cond_exec, - .patch_cond_exec = gfx_v10_0_ring_emit_patch_cond_exec, .preempt_ib = gfx_v10_0_ring_preempt_ib, .emit_frame_cntl = gfx_v10_0_ring_emit_frame_cntl, .emit_wreg = gfx_v10_0_ring_emit_wreg, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 6a6fc422e4..f00e05aba4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -89,6 +89,10 @@ MODULE_FIRMWARE("amdgpu/gc_11_5_0_pfp.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_0_me.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_0_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_0_rlc.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_1_pfp.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_1_me.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_1_mec.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_1_rlc.bin"); static const struct soc15_reg_golden golden_settings_gc_11_0[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL, 0x20000000, 0x20000000) @@ -727,7 +731,7 @@ static int gfx_v11_0_rlc_init(struct amdgpu_device *adev) /* init spm vmid with 0xf */ if (adev->gfx.rlc.funcs->update_spm_vmid) - adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf); + adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf); return 0; } @@ -907,6 +911,7 @@ static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev) case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): adev->gfx.config.max_hw_contexts = 8; adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; adev->gfx.config.sc_prim_fifo_size_backend = 0x100; @@ -1329,7 +1334,7 @@ static int gfx_v11_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev) static int gfx_v11_0_sw_init(void *handle) { int i, j, k, r, ring_id = 0; - struct amdgpu_kiq *kiq; + int xcc_id = 0; struct amdgpu_device *adev = (struct amdgpu_device *)handle; switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { @@ -1346,6 +1351,7 @@ static int gfx_v11_0_sw_init(void *handle) case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; adev->gfx.me.num_queue_per_pipe = 1; @@ -1454,8 +1460,7 @@ static int gfx_v11_0_sw_init(void *handle) return r; } - kiq = &adev->gfx.kiq[0]; - r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, 0); + r = amdgpu_gfx_kiq_init_ring(adev, xcc_id); if (r) return r; } @@ -2588,7 +2593,8 @@ static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev) IP_VERSION(11, 0, 1) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 4) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 0)) + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 0) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 1)) bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1); else @@ -5027,7 +5033,7 @@ static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev, return 0; } -static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) +static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid) { u32 data; @@ -5041,6 +5047,14 @@ static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data); amdgpu_gfx_off_ctrl(adev, true); + + if (ring + && amdgpu_sriov_is_pp_one_vf(adev) + && ((ring->funcs->type == AMDGPU_RING_TYPE_GFX) + || (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE))) { + uint32_t reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL); + amdgpu_ring_emit_wreg(ring, reg, data); + } } static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = { @@ -5074,6 +5088,7 @@ static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable) case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1); break; default: @@ -5109,6 +5124,7 @@ static int gfx_v11_0_set_powergating_state(void *handle, case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): if (!enable) amdgpu_gfx_off_ctrl(adev, false); @@ -5140,6 +5156,7 @@ static int gfx_v11_0_set_clockgating_state(void *handle, case IP_VERSION(11, 0, 3): case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): gfx_v11_0_update_gfx_clock_gating(adev, state == AMD_CG_STATE_GATE); break; @@ -5444,6 +5461,12 @@ static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring, amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); amdgpu_ring_write(ring, 0x0); } + + /* Make sure that we can't skip the SET_Q_MODE packets when the VM + * changed in any way. + */ + ring->set_q_mode_offs = 0; + ring->set_q_mode_ptr = NULL; } static void gfx_v11_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, @@ -5493,16 +5516,81 @@ static void gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, amdgpu_ring_write(ring, 0); } +static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring, + uint64_t addr) +{ + unsigned ret; + + amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + /* discard following DWs if *cond_exec_gpu_addr==0 */ + amdgpu_ring_write(ring, 0); + ret = ring->wptr & ring->buf_mask; + /* patch dummy value later */ + amdgpu_ring_write(ring, 0); + + return ret; +} + static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring, u64 shadow_va, u64 csa_va, u64 gds_va, bool init_shadow, int vmid) { struct amdgpu_device *adev = ring->adev; + unsigned int offs, end; - if (!adev->gfx.cp_gfx_shadow) + if (!adev->gfx.cp_gfx_shadow || !ring->ring_obj) return; + /* + * The logic here isn't easy to understand because we need to keep state + * accross multiple executions of the function as well as between the + * CPU and GPU. The general idea is that the newly written GPU command + * has a condition on the previous one and only executed if really + * necessary. + */ + + /* + * The dw in the NOP controls if the next SET_Q_MODE packet should be + * executed or not. Reserve 64bits just to be on the save side. + */ + amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, 1)); + offs = ring->wptr & ring->buf_mask; + + /* + * We start with skipping the prefix SET_Q_MODE and always executing + * the postfix SET_Q_MODE packet. This is changed below with a + * WRITE_DATA command when the postfix executed. + */ + amdgpu_ring_write(ring, shadow_va ? 1 : 0); + amdgpu_ring_write(ring, 0); + + if (ring->set_q_mode_offs) { + uint64_t addr; + + addr = amdgpu_bo_gpu_offset(ring->ring_obj); + addr += ring->set_q_mode_offs << 2; + end = gfx_v11_0_ring_emit_init_cond_exec(ring, addr); + } + + /* + * When the postfix SET_Q_MODE packet executes we need to make sure that the + * next prefix SET_Q_MODE packet executes as well. + */ + if (!shadow_va) { + uint64_t addr; + + addr = amdgpu_bo_gpu_offset(ring->ring_obj); + addr += offs << 2; + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + amdgpu_ring_write(ring, 0x1); + } + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_Q_PREEMPTION_MODE, 7)); amdgpu_ring_write(ring, lower_32_bits(shadow_va)); amdgpu_ring_write(ring, upper_32_bits(shadow_va)); @@ -5514,33 +5602,26 @@ static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring, PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(vmid) : 0); amdgpu_ring_write(ring, init_shadow ? PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM : 0); -} -static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) -{ - unsigned ret; + if (ring->set_q_mode_offs) + amdgpu_ring_patch_cond_exec(ring, end); - amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); - amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ - ret = ring->wptr & ring->buf_mask; - amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ + if (shadow_va) { + uint64_t token = shadow_va ^ csa_va ^ gds_va ^ vmid; - return ret; -} + /* + * If the tokens match try to skip the last postfix SET_Q_MODE + * packet to avoid saving/restoring the state all the time. + */ + if (ring->set_q_mode_ptr && ring->set_q_mode_token == token) + *ring->set_q_mode_ptr = 0; -static void gfx_v11_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) -{ - unsigned cur; - BUG_ON(offset > ring->buf_mask); - BUG_ON(ring->ring[offset] != 0x55aa55aa); + ring->set_q_mode_token = token; + } else { + ring->set_q_mode_ptr = &ring->ring[ring->set_q_mode_offs]; + } - cur = (ring->wptr - 1) & ring->buf_mask; - if (likely(cur > offset)) - ring->ring[offset] = cur - offset; - else - ring->ring[offset] = (ring->buf_mask + 1) - offset + cur; + ring->set_q_mode_offs = offs; } static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring) @@ -6104,9 +6185,10 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { .get_rptr = gfx_v11_0_ring_get_rptr_gfx, .get_wptr = gfx_v11_0_ring_get_wptr_gfx, .set_wptr = gfx_v11_0_ring_set_wptr_gfx, - .emit_frame_size = /* totally 242 maximum if 16 IBs */ + .emit_frame_size = /* totally 247 maximum if 16 IBs */ + 5 + /* update_spm_vmid */ 5 + /* COND_EXEC */ - 9 + /* SET_Q_PREEMPTION_MODE */ + 22 + /* SET_Q_PREEMPTION_MODE */ 7 + /* PIPELINE_SYNC */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + @@ -6119,6 +6201,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { 31 + /* DE_META */ 3 + /* CNTX_CTRL */ 5 + /* HDP_INVL */ + 22 + /* SET_Q_PREEMPTION_MODE */ 8 + 8 + /* FENCE x2 */ 8, /* gfx_v11_0_emit_mem_sync */ .emit_ib_size = 4, /* gfx_v11_0_ring_emit_ib_gfx */ @@ -6135,7 +6218,6 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { .emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl, .emit_gfx_shadow = gfx_v11_0_ring_emit_gfx_shadow, .init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec, - .patch_cond_exec = gfx_v11_0_ring_emit_patch_cond_exec, .preempt_ib = gfx_v11_0_ring_preempt_ib, .emit_frame_cntl = gfx_v11_0_ring_emit_frame_cntl, .emit_wreg = gfx_v11_0_ring_emit_wreg, @@ -6154,6 +6236,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = { .get_wptr = gfx_v11_0_ring_get_wptr_compute, .set_wptr = gfx_v11_0_ring_set_wptr_compute, .emit_frame_size = + 5 + /* update_spm_vmid */ 20 + /* gfx_v11_0_ring_emit_gds_switch */ 7 + /* gfx_v11_0_ring_emit_hdp_flush */ 5 + /* hdp invalidate */ diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c index 26d6286d86..9e7ce1e6bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c @@ -69,7 +69,7 @@ static int gfx_v11_0_3_rlc_gc_fed_irq(struct amdgpu_device *adev, amdgpu_ras_interrupt_dispatch(adev, &ih_data); } else { if (adev->virt.ops && adev->virt.ops->ras_poison_handler) - adev->virt.ops->ras_poison_handler(adev); + adev->virt.ops->ras_poison_handler(adev, ras_if->block); else dev_warn(adev->dev, "No ras_poison_handler interface in SRIOV for %s!\n", ras_if->name); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index c2faf6b4c2..86a4865b1a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -3274,7 +3274,7 @@ static int gfx_v7_0_rlc_init(struct amdgpu_device *adev) /* init spm vmid with 0xf */ if (adev->gfx.rlc.funcs->update_spm_vmid) - adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf); + adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf); return 0; } @@ -3500,7 +3500,7 @@ static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev) return 0; } -static void gfx_v7_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) +static void gfx_v7_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid) { u32 data; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 1943beb135..202ddda57f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -1288,7 +1288,7 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) /* init spm vmid with 0xf */ if (adev->gfx.rlc.funcs->update_spm_vmid) - adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf); + adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf); return 0; } @@ -1900,8 +1900,8 @@ static void gfx_v8_0_sq_irq_work_func(struct work_struct *work); static int gfx_v8_0_sw_init(void *handle) { int i, j, k, r, ring_id; + int xcc_id = 0; struct amdgpu_ring *ring; - struct amdgpu_kiq *kiq; struct amdgpu_device *adev = (struct amdgpu_device *)handle; switch (adev->asic_type) { @@ -2022,8 +2022,7 @@ static int gfx_v8_0_sw_init(void *handle) return r; } - kiq = &adev->gfx.kiq[0]; - r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, 0); + r = amdgpu_gfx_kiq_init_ring(adev, xcc_id); if (r) return r; @@ -5579,7 +5578,7 @@ static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id) } } -static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) +static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid) { u32 data; @@ -6327,33 +6326,22 @@ static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) amdgpu_ring_write(ring, 0); } -static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) +static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring, + uint64_t addr) { unsigned ret; amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); - amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + /* discard following DWs if *cond_exec_gpu_addr==0 */ + amdgpu_ring_write(ring, 0); ret = ring->wptr & ring->buf_mask; - amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ + /* patch dummy value later */ + amdgpu_ring_write(ring, 0); return ret; } -static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) -{ - unsigned cur; - - BUG_ON(offset > ring->buf_mask); - BUG_ON(ring->ring[offset] != 0x55aa55aa); - - cur = (ring->wptr & ring->buf_mask) - 1; - if (likely(cur > offset)) - ring->ring[offset] = cur - offset; - else - ring->ring[offset] = (ring->ring_size >> 2) - offset + cur; -} - static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t reg_val_offs) { @@ -6933,7 +6921,6 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { .emit_switch_buffer = gfx_v8_ring_emit_sb, .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl, .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec, - .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec, .emit_wreg = gfx_v8_0_ring_emit_wreg, .soft_recovery = gfx_v8_0_ring_soft_recovery, .emit_mem_sync = gfx_v8_0_emit_mem_sync, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 153932c1f6..99dbd23411 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1997,8 +1997,8 @@ static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, static int gfx_v9_0_sw_init(void *handle) { int i, j, k, r, ring_id; + int xcc_id = 0; struct amdgpu_ring *ring; - struct amdgpu_kiq *kiq; struct amdgpu_device *adev = (struct amdgpu_device *)handle; unsigned int hw_prio; @@ -2080,7 +2080,7 @@ static int gfx_v9_0_sw_init(void *handle) ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; /* disable scheduler on the real ring */ - ring->no_scheduler = true; + ring->no_scheduler = adev->gfx.mcbp; ring->vm_hub = AMDGPU_GFXHUB(0); r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, @@ -2090,7 +2090,7 @@ static int gfx_v9_0_sw_init(void *handle) } /* set up the software rings */ - if (adev->gfx.num_gfx_rings) { + if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) { for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) { ring = &adev->gfx.sw_gfx_ring[i]; ring->ring_obj = NULL; @@ -2151,8 +2151,7 @@ static int gfx_v9_0_sw_init(void *handle) return r; } - kiq = &adev->gfx.kiq[0]; - r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, 0); + r = amdgpu_gfx_kiq_init_ring(adev, xcc_id); if (r) return r; @@ -2181,7 +2180,7 @@ static int gfx_v9_0_sw_fini(void *handle) int i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->gfx.num_gfx_rings) { + if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) { for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) amdgpu_ring_fini(&adev->gfx.sw_gfx_ring[i]); amdgpu_ring_mux_fini(&adev->gfx.muxer); @@ -4902,7 +4901,7 @@ static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev, WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data); } -static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned int vmid) +static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned int vmid) { amdgpu_gfx_off_ctrl(adev, false); @@ -5611,31 +5610,21 @@ static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) amdgpu_ring_write(ring, 0); } -static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) +static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring, + uint64_t addr) { unsigned ret; amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); - amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + /* discard following DWs if *cond_exec_gpu_addr==0 */ + amdgpu_ring_write(ring, 0); ret = ring->wptr & ring->buf_mask; - amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ + /* patch dummy value later */ + amdgpu_ring_write(ring, 0); return ret; } -static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) -{ - unsigned cur; - BUG_ON(offset > ring->buf_mask); - BUG_ON(ring->ring[offset] != 0x55aa55aa); - - cur = (ring->wptr - 1) & ring->buf_mask; - if (likely(cur > offset)) - ring->ring[offset] = cur - offset; - else - ring->ring[offset] = (ring->ring_size>>2) - offset + cur; -} - static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t reg_val_offs) { @@ -5910,11 +5899,14 @@ static int gfx_v9_0_eop_irq(struct amdgpu_device *adev, switch (me_id) { case 0: - if (adev->gfx.num_gfx_rings && - !amdgpu_mcbp_handle_trailing_fence_irq(&adev->gfx.muxer)) { - /* Fence signals are handled on the software rings*/ - for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) - amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]); + if (adev->gfx.num_gfx_rings) { + if (!adev->gfx.mcbp) { + amdgpu_fence_process(&adev->gfx.gfx_ring[0]); + } else if (!amdgpu_mcbp_handle_trailing_fence_irq(&adev->gfx.muxer)) { + /* Fence signals are handled on the software rings*/ + for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) + amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]); + } } break; case 1: @@ -6909,7 +6901,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .emit_switch_buffer = gfx_v9_ring_emit_sb, .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, - .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, .preempt_ib = gfx_v9_0_ring_preempt_ib, .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl, .emit_wreg = gfx_v9_0_ring_emit_wreg, @@ -6964,7 +6955,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { .emit_switch_buffer = gfx_v9_ring_emit_sb, .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, - .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl, .emit_wreg = gfx_v9_0_ring_emit_wreg, .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, @@ -7049,7 +7039,7 @@ static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) for (i = 0; i < adev->gfx.num_gfx_rings; i++) adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx; - if (adev->gfx.num_gfx_rings) { + if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) { for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) adev->gfx.sw_gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c index bc8416afb6..f53b379d89 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c @@ -970,8 +970,9 @@ static void gfx_v9_4_reset_ras_error_count(struct amdgpu_device *adev) WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_INDEX, 255); } -static const struct soc15_reg_entry gfx_v9_4_ea_err_status_regs = - { SOC15_REG_ENTRY(GC, 0, mmGCEA_ERR_STATUS), 0, 1, 32 }; +static const struct soc15_reg_entry gfx_v9_4_ea_err_status_regs = { + SOC15_REG_ENTRY(GC, 0, mmGCEA_ERR_STATUS), 0, 1, 32 +}; static void gfx_v9_4_query_ras_error_status(struct amdgpu_device *adev) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 131cddbdda..d89d6829f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -38,6 +38,7 @@ #include "gfx_v9_4_3.h" #include "amdgpu_xcp.h" +#include "amdgpu_aca.h" MODULE_FIRMWARE("amdgpu/gc_9_4_3_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_9_4_3_rlc.bin"); @@ -48,6 +49,10 @@ MODULE_FIRMWARE("amdgpu/gc_9_4_3_rlc.bin"); #define GOLDEN_GB_ADDR_CONFIG 0x2a114042 #define CP_HQD_PERSISTENT_STATE_DEFAULT 0xbe05301 +#define mmSMNAID_XCD0_MCA_SMU 0x36430400 /* SMN AID XCD0 */ +#define mmSMNAID_XCD1_MCA_SMU 0x38430400 /* SMN AID XCD1 */ +#define mmSMNXCD_XCD0_MCA_SMU 0x40430400 /* SMN XCD XCD0 */ + struct amdgpu_gfx_ras gfx_v9_4_3_ras; static void gfx_v9_4_3_set_ring_funcs(struct amdgpu_device *adev); @@ -426,16 +431,16 @@ out: static int gfx_v9_4_3_init_microcode(struct amdgpu_device *adev) { - const char *chip_name; + char ucode_prefix[15]; int r; - chip_name = "gc_9_4_3"; + amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); - r = gfx_v9_4_3_init_rlc_microcode(adev, chip_name); + r = gfx_v9_4_3_init_rlc_microcode(adev, ucode_prefix); if (r) return r; - r = gfx_v9_4_3_init_cp_compute_microcode(adev, chip_name); + r = gfx_v9_4_3_init_cp_compute_microcode(adev, ucode_prefix); if (r) return r; @@ -675,6 +680,66 @@ static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = { .ih_node_to_logical_xcc = &gfx_v9_4_3_ih_to_xcc_inst, }; +static int gfx_v9_4_3_aca_bank_generate_report(struct aca_handle *handle, + struct aca_bank *bank, enum aca_error_type type, + struct aca_bank_report *report, void *data) +{ + u64 status, misc0; + u32 instlo; + int ret; + + status = bank->regs[ACA_REG_IDX_STATUS]; + if ((type == ACA_ERROR_TYPE_UE && + ACA_REG__STATUS__ERRORCODEEXT(status) == ACA_EXTERROR_CODE_FAULT) || + (type == ACA_ERROR_TYPE_CE && + ACA_REG__STATUS__ERRORCODEEXT(status) == ACA_EXTERROR_CODE_CE)) { + + ret = aca_bank_info_decode(bank, &report->info); + if (ret) + return ret; + + /* NOTE: overwrite info.die_id with xcd id for gfx */ + instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]); + instlo &= GENMASK(31, 1); + report->info.die_id = instlo == mmSMNAID_XCD0_MCA_SMU ? 0 : 1; + + misc0 = bank->regs[ACA_REG_IDX_MISC0]; + report->count[type] = ACA_REG__MISC0__ERRCNT(misc0); + } + + return 0; +} + +static bool gfx_v9_4_3_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, + enum aca_error_type type, void *data) +{ + u32 instlo; + + instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]); + instlo &= GENMASK(31, 1); + switch (instlo) { + case mmSMNAID_XCD0_MCA_SMU: + case mmSMNAID_XCD1_MCA_SMU: + case mmSMNXCD_XCD0_MCA_SMU: + return true; + default: + break; + } + + return false; +} + +static const struct aca_bank_ops gfx_v9_4_3_aca_bank_ops = { + .aca_bank_generate_report = gfx_v9_4_3_aca_bank_generate_report, + .aca_bank_is_valid = gfx_v9_4_3_aca_bank_is_valid, +}; + +static const struct aca_info gfx_v9_4_3_aca_info = { + .hwip = ACA_HWIP_TYPE_SMU, + .mask = ACA_ERROR_UE_MASK | ACA_ERROR_CE_MASK, + .bank_ops = &gfx_v9_4_3_aca_bank_ops, +}; + static int gfx_v9_4_3_gpu_early_init(struct amdgpu_device *adev) { u32 gb_addr_config; @@ -778,7 +843,6 @@ static int gfx_v9_4_3_compute_ring_init(struct amdgpu_device *adev, int ring_id, static int gfx_v9_4_3_sw_init(void *handle) { int i, j, k, r, ring_id, xcc_id, num_xcc; - struct amdgpu_kiq *kiq; struct amdgpu_device *adev = (struct amdgpu_device *)handle; adev->gfx.mec.num_mec = 2; @@ -847,8 +911,7 @@ static int gfx_v9_4_3_sw_init(void *handle) return r; } - kiq = &adev->gfx.kiq[xcc_id]; - r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, xcc_id); + r = amdgpu_gfx_kiq_init_ring(adev, xcc_id); if (r) return r; @@ -1109,7 +1172,7 @@ static int gfx_v9_4_3_rlc_init(struct amdgpu_device *adev) { /* init spm vmid with 0xf */ if (adev->gfx.rlc.funcs->update_spm_vmid) - adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf); + adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf); return 0; } @@ -1320,7 +1383,7 @@ static int gfx_v9_4_3_rlc_resume(struct amdgpu_device *adev) return 0; } -static void gfx_v9_4_3_update_spm_vmid(struct amdgpu_device *adev, +static void gfx_v9_4_3_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid) { u32 reg, data; @@ -3888,6 +3951,9 @@ static void gfx_v9_4_3_inst_enable_watchdog_timer(struct amdgpu_device *adev, uint32_t i; uint32_t data; + if (amdgpu_sriov_vf(adev)) + return; + data = RREG32_SOC15(GC, GET_INST(GC, 0), regSQ_TIMEOUT_CONFIG); data = REG_SET_FIELD(data, SQ_TIMEOUT_CONFIG, TIMEOUT_FATAL_DISABLE, amdgpu_watchdog_timer.timeout_fatal_disable ? 1 : 0); @@ -4242,9 +4308,32 @@ struct amdgpu_ras_block_hw_ops gfx_v9_4_3_ras_ops = { .reset_ras_error_count = &gfx_v9_4_3_reset_ras_error_count, }; +static int gfx_v9_4_3_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) +{ + int r; + + r = amdgpu_ras_block_late_init(adev, ras_block); + if (r) + return r; + + r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__GFX, + &gfx_v9_4_3_aca_info, + NULL); + if (r) + goto late_fini; + + return 0; + +late_fini: + amdgpu_ras_block_late_fini(adev, ras_block); + + return r; +} + struct amdgpu_gfx_ras gfx_v9_4_3_ras = { .ras_block = { .hw_ops = &gfx_v9_4_3_ras_ops, + .ras_late_init = &gfx_v9_4_3_ras_late_init, }, .enable_watchdog_timer = &gfx_v9_4_3_enable_watchdog_timer, }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c index cd0e8a321e..17509f32f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c @@ -155,6 +155,9 @@ static void gfxhub_v2_1_init_system_aperture_regs(struct amdgpu_device *adev) { uint64_t value; + if (amdgpu_sriov_vf(adev)) + return; + /* Program the AGP BAR */ WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BASE, 0); WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 6c51856088..d933e19e0c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -105,7 +105,7 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_vmhub *hub = &adev->vmhub[vmhub_index]; bool retry_fault = !!(entry->src_data[1] & 0x80); bool write_fault = !!(entry->src_data[1] & 0x20); - struct amdgpu_task_info task_info; + struct amdgpu_task_info *task_info; uint32_t status = 0; u64 addr; @@ -157,18 +157,22 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev, if (!printk_ratelimit()) return 0; - memset(&task_info, 0, sizeof(struct amdgpu_task_info)); - amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); - dev_err(adev->dev, - "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u, for process %s pid %d thread %s pid %d)\n", + "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n", entry->vmid_src ? "mmhub" : "gfxhub", - entry->src_id, entry->ring_id, entry->vmid, - entry->pasid, task_info.process_name, task_info.tgid, - task_info.task_name, task_info.pid); + entry->src_id, entry->ring_id, entry->vmid, entry->pasid); + task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid); + if (task_info) { + dev_err(adev->dev, + " in process %s pid %d thread %s pid %d\n", + task_info->process_name, task_info->tgid, + task_info->task_name, task_info->pid); + amdgpu_vm_put_task_info(task_info); + } + dev_err(adev->dev, " in page starting at address 0x%016llx from client 0x%x (%s)\n", - addr, entry->client_id, - soc15_ih_clientid_name[entry->client_id]); + addr, entry->client_id, + soc15_ih_clientid_name[entry->client_id]); if (!amdgpu_sriov_vf(adev)) hub->vmhub_funcs->print_l2_protection_fault_status(adev, @@ -262,16 +266,17 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, /* flush hdp cache */ adev->hdp.funcs->flush_hdp(adev, NULL); - /* For SRIOV run time, driver shouldn't access the register through MMIO - * Directly use kiq to do the vm invalidation instead + /* This is necessary for SRIOV as well as for GFXOFF to function + * properly under bare metal */ if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { - amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req, - 1 << vmid, GET_INST(GC, 0)); + amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req, + 1 << vmid, GET_INST(GC, 0)); return; } + /* This path is needed before KIQ/MES/GFXOFF are set up */ hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ? GC_HWIP : MMHUB_HWIP; spin_lock(&adev->gmc.invalidate_lock); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index 3f1692194b..527dc917e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -126,19 +126,24 @@ static int gmc_v11_0_process_interrupt(struct amdgpu_device *adev, } if (printk_ratelimit()) { - struct amdgpu_task_info task_info; - - memset(&task_info, 0, sizeof(struct amdgpu_task_info)); - amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); + struct amdgpu_task_info *task_info; dev_err(adev->dev, - "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u, for process %s pid %d thread %s pid %d)\n", + "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n", entry->vmid_src ? "mmhub" : "gfxhub", - entry->src_id, entry->ring_id, entry->vmid, - entry->pasid, task_info.process_name, task_info.tgid, - task_info.task_name, task_info.pid); + entry->src_id, entry->ring_id, entry->vmid, entry->pasid); + task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid); + if (task_info) { + dev_err(adev->dev, + " in process %s pid %d thread %s pid %d)\n", + task_info->process_name, task_info->tgid, + task_info->task_name, task_info->pid); + amdgpu_vm_put_task_info(task_info); + } + dev_err(adev->dev, " in page starting at address 0x%016llx from client %d\n", - addr, entry->client_id); + addr, entry->client_id); + if (!amdgpu_sriov_vf(adev)) hub->vmhub_funcs->print_l2_protection_fault_status(adev, status); } @@ -223,16 +228,17 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, /* flush hdp cache */ adev->hdp.funcs->flush_hdp(adev, NULL); - /* For SRIOV run time, driver shouldn't access the register through MMIO - * Directly use kiq to do the vm invalidation instead + /* This is necessary for SRIOV as well as for GFXOFF to function + * properly under bare metal */ if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring.sched.ready) && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { - amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req, - 1 << vmid, GET_INST(GC, 0)); + amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req, + 1 << vmid, GET_INST(GC, 0)); return; } + /* This path is needed before KIQ/MES/GFXOFF are set up */ hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ? GC_HWIP : MMHUB_HWIP; spin_lock(&adev->gmc.invalidate_lock); @@ -586,6 +592,7 @@ static void gmc_v11_0_set_gfxhub_funcs(struct amdgpu_device *adev) adev->gfxhub.funcs = &gfxhub_v3_0_3_funcs; break; case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): adev->gfxhub.funcs = &gfxhub_v11_5_0_funcs; break; default: @@ -747,6 +754,7 @@ static int gmc_v11_0_sw_init(void *handle) case IP_VERSION(11, 0, 3): case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask); set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask); /* diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index 59d9215e55..23b4786399 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -435,9 +435,10 @@ static void gmc_v6_0_set_prt(struct amdgpu_device *adev, bool enable) WREG32(mmVM_PRT_CNTL, tmp); if (enable) { - uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT; + uint32_t low = AMDGPU_VA_RESERVED_BOTTOM >> + AMDGPU_GPU_PAGE_SHIFT; uint32_t high = adev->vm_manager.max_pfn - - (AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT); + (AMDGPU_VA_RESERVED_TOP >> AMDGPU_GPU_PAGE_SHIFT); WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low); WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 45a2f8e031..3da7b6a2b0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -563,9 +563,10 @@ static void gmc_v7_0_set_prt(struct amdgpu_device *adev, bool enable) WREG32(mmVM_PRT_CNTL, tmp); if (enable) { - uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT; + uint32_t low = AMDGPU_VA_RESERVED_BOTTOM >> + AMDGPU_GPU_PAGE_SHIFT; uint32_t high = adev->vm_manager.max_pfn - - (AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT); + (AMDGPU_VA_RESERVED_TOP >> AMDGPU_GPU_PAGE_SHIFT); WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low); WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 4422b27a3c..d20e5f20ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -777,9 +777,10 @@ static void gmc_v8_0_set_prt(struct amdgpu_device *adev, bool enable) WREG32(mmVM_PRT_CNTL, tmp); if (enable) { - uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT; + uint32_t low = AMDGPU_VA_RESERVED_BOTTOM >> + AMDGPU_GPU_PAGE_SHIFT; uint32_t high = adev->vm_manager.max_pfn - - (AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT); + (AMDGPU_VA_RESERVED_TOP >> AMDGPU_GPU_PAGE_SHIFT); WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low); WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low); @@ -1444,18 +1445,24 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, gmc_v8_0_set_fault_enable_default(adev, false); if (printk_ratelimit()) { - struct amdgpu_task_info task_info; + struct amdgpu_task_info *task_info; - memset(&task_info, 0, sizeof(struct amdgpu_task_info)); - amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); + dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n", + entry->src_id, entry->src_data[0]); + + task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid); + if (task_info) { + dev_err(adev->dev, " for process %s pid %d thread %s pid %d\n", + task_info->process_name, task_info->tgid, + task_info->task_name, task_info->pid); + amdgpu_vm_put_task_info(task_info); + } - dev_err(adev->dev, "GPU fault detected: %d 0x%08x for process %s pid %d thread %s pid %d\n", - entry->src_id, entry->src_data[0], task_info.process_name, - task_info.tgid, task_info.task_name, task_info.pid); dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", - addr); + addr); dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", status); + gmc_v8_0_vm_decode_fault(adev, status, addr, mc_client, entry->pasid); } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index e67a62db9e..47b63a4ce6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -496,14 +496,14 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, if (j >= AMDGPU_MMHUB0(0)) tmp = RREG32_SOC15_IP(MMHUB, reg); else - tmp = RREG32_SOC15_IP(GC, reg); + tmp = RREG32_XCC(reg, j); tmp &= ~bits; if (j >= AMDGPU_MMHUB0(0)) WREG32_SOC15_IP(MMHUB, reg, tmp); else - WREG32_SOC15_IP(GC, reg, tmp); + WREG32_XCC(reg, tmp, j); } } break; @@ -524,14 +524,14 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, if (j >= AMDGPU_MMHUB0(0)) tmp = RREG32_SOC15_IP(MMHUB, reg); else - tmp = RREG32_SOC15_IP(GC, reg); + tmp = RREG32_XCC(reg, j); tmp |= bits; if (j >= AMDGPU_MMHUB0(0)) WREG32_SOC15_IP(MMHUB, reg, tmp); else - WREG32_SOC15_IP(GC, reg, tmp); + WREG32_XCC(reg, tmp, j); } } break; @@ -549,7 +549,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, bool retry_fault = !!(entry->src_data[1] & 0x80); bool write_fault = !!(entry->src_data[1] & 0x20); uint32_t status = 0, cid = 0, rw = 0; - struct amdgpu_task_info task_info; + struct amdgpu_task_info *task_info; struct amdgpu_vmhub *hub; const char *mmhub_cid; const char *hub_name; @@ -626,15 +626,20 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, if (!printk_ratelimit()) return 0; - memset(&task_info, 0, sizeof(struct amdgpu_task_info)); - amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); - dev_err(adev->dev, - "[%s] %s page fault (src_id:%u ring:%u vmid:%u pasid:%u, for process %s pid %d thread %s pid %d)\n", - hub_name, retry_fault ? "retry" : "no-retry", - entry->src_id, entry->ring_id, entry->vmid, - entry->pasid, task_info.process_name, task_info.tgid, - task_info.task_name, task_info.pid); + "[%s] %s page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n", hub_name, + retry_fault ? "retry" : "no-retry", + entry->src_id, entry->ring_id, entry->vmid, entry->pasid); + + task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid); + if (task_info) { + dev_err(adev->dev, + " for process %s pid %d thread %s pid %d)\n", + task_info->process_name, task_info->tgid, + task_info->task_name, task_info->pid); + amdgpu_vm_put_task_info(task_info); + } + dev_err(adev->dev, " in page starting at address 0x%016llx from IH client 0x%x (%s)\n", addr, entry->client_id, soc15_ih_clientid_name[entry->client_id]); @@ -829,23 +834,25 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, req = hub->vm_inv_eng0_req + hub->eng_distance * eng; ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; - /* This is necessary for a HW workaround under SRIOV as well - * as GFXOFF under bare metal - */ if (vmhub >= AMDGPU_MMHUB0(0)) inst = GET_INST(GC, 0); else inst = vmhub; + + /* This is necessary for SRIOV as well as for GFXOFF to function + * properly under bare metal + */ if (adev->gfx.kiq[inst].ring.sched.ready && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng; uint32_t ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; - amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req, - 1 << vmid, inst); + amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req, + 1 << vmid, inst); return; } + /* This path is needed before KIQ/MES/GFXOFF are set up */ spin_lock(&adev->gmc.invalidate_lock); /* diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c new file mode 100644 index 0000000000..8d7d0813e3 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c @@ -0,0 +1,142 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "amdgpu_atombios.h" +#include "hdp_v7_0.h" + +#include "hdp/hdp_7_0_0_offset.h" +#include "hdp/hdp_7_0_0_sh_mask.h" +#include <uapi/linux/kfd_ioctl.h> + +static void hdp_v7_0_flush_hdp(struct amdgpu_device *adev, + struct amdgpu_ring *ring) +{ + if (!ring || !ring->funcs->emit_wreg) + WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); + else + amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); +} + +static void hdp_v7_0_update_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t hdp_clk_cntl, hdp_clk_cntl1; + uint32_t hdp_mem_pwr_cntl; + + if (!(adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_HDP_DS | + AMD_CG_SUPPORT_HDP_SD))) + return; + + hdp_clk_cntl = hdp_clk_cntl1 = RREG32_SOC15(HDP, 0,regHDP_CLK_CNTL); + hdp_mem_pwr_cntl = RREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL); + + /* Before doing clock/power mode switch, + * forced on IPH & RC clock */ + hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL, + RC_MEM_CLK_SOFT_OVERRIDE, 1); + WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl); + + /* disable clock and power gating before any changing */ + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_CTRL_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_LS_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_DS_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_SD_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_CTRL_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_LS_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_DS_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_SD_EN, 0); + WREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl); + + /* Already disabled above. The actions below are for "enabled" only */ + if (enable) { + /* only one clock gating mode (LS/DS/SD) can be enabled */ + if (adev->cg_flags & AMD_CG_SUPPORT_HDP_SD) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_SD_EN, 1); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + RC_MEM_POWER_SD_EN, 1); + } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_LS_EN, 1); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + RC_MEM_POWER_LS_EN, 1); + } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_DS_EN, 1); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + RC_MEM_POWER_DS_EN, 1); + } + + /* confirmed that IPH_MEM_POWER_CTRL_EN and RC_MEM_POWER_CTRL_EN have to + * be set for SRAM LS/DS/SD */ + if (adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_HDP_DS | + AMD_CG_SUPPORT_HDP_SD)) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_CTRL_EN, 1); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_CTRL_EN, 1); + WREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl); + } + } + + /* disable IPH & RC clock override after clock/power mode changing */ + hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL, + RC_MEM_CLK_SOFT_OVERRIDE, 0); + WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl); +} + +static void hdp_v7_0_get_clockgating_state(struct amdgpu_device *adev, + u64 *flags) +{ + uint32_t tmp; + + /* AMD_CG_SUPPORT_HDP_LS/DS/SD */ + tmp = RREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL); + if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_LS_EN_MASK) + *flags |= AMD_CG_SUPPORT_HDP_LS; + else if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_DS_EN_MASK) + *flags |= AMD_CG_SUPPORT_HDP_DS; + else if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_SD_EN_MASK) + *flags |= AMD_CG_SUPPORT_HDP_SD; +} + +const struct amdgpu_hdp_funcs hdp_v7_0_funcs = { + .flush_hdp = hdp_v7_0_flush_hdp, + .update_clock_gating = hdp_v7_0_update_clock_gating, + .get_clock_gating_state = hdp_v7_0_get_clockgating_state, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.h b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.h new file mode 100644 index 0000000000..25b6920140 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.h @@ -0,0 +1,31 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __HDP_V7_0_H__ +#define __HDP_V7_0_H__ + +#include "soc15_common.h" + +extern const struct amdgpu_hdp_funcs hdp_v7_0_funcs; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c new file mode 100644 index 0000000000..7aed96fa10 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c @@ -0,0 +1,773 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/pci.h> + +#include "amdgpu.h" +#include "amdgpu_ih.h" + +#include "oss/osssys_7_0_0_offset.h" +#include "oss/osssys_7_0_0_sh_mask.h" + +#include "soc15_common.h" +#include "ih_v7_0.h" + +#define MAX_REARM_RETRY 10 + +static void ih_v7_0_set_interrupt_funcs(struct amdgpu_device *adev); + +/** + * ih_v7_0_init_register_offset - Initialize register offset for ih rings + * + * @adev: amdgpu_device pointer + * + * Initialize register offset ih rings (IH_V7_0). + */ +static void ih_v7_0_init_register_offset(struct amdgpu_device *adev) +{ + struct amdgpu_ih_regs *ih_regs; + + /* ih ring 2 is removed + * ih ring and ih ring 1 are available */ + if (adev->irq.ih.ring_size) { + ih_regs = &adev->irq.ih.ih_regs; + ih_regs->ih_rb_base = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE); + ih_regs->ih_rb_base_hi = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE_HI); + ih_regs->ih_rb_cntl = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_CNTL); + ih_regs->ih_rb_wptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR); + ih_regs->ih_rb_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_RPTR); + ih_regs->ih_doorbell_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_DOORBELL_RPTR); + ih_regs->ih_rb_wptr_addr_lo = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR_ADDR_LO); + ih_regs->ih_rb_wptr_addr_hi = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR_ADDR_HI); + ih_regs->psp_reg_id = PSP_REG_IH_RB_CNTL; + } + + if (adev->irq.ih1.ring_size) { + ih_regs = &adev->irq.ih1.ih_regs; + ih_regs->ih_rb_base = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE_RING1); + ih_regs->ih_rb_base_hi = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE_HI_RING1); + ih_regs->ih_rb_cntl = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_CNTL_RING1); + ih_regs->ih_rb_wptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR_RING1); + ih_regs->ih_rb_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_RPTR_RING1); + ih_regs->ih_doorbell_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_DOORBELL_RPTR_RING1); + ih_regs->psp_reg_id = PSP_REG_IH_RB_CNTL_RING1; + } +} + +/** + * force_update_wptr_for_self_int - Force update the wptr for self interrupt + * + * @adev: amdgpu_device pointer + * @threshold: threshold to trigger the wptr reporting + * @timeout: timeout to trigger the wptr reporting + * @enabled: Enable/disable timeout flush mechanism + * + * threshold input range: 0 ~ 15, default 0, + * real_threshold = 2^threshold + * timeout input range: 0 ~ 20, default 8, + * real_timeout = (2^timeout) * 1024 / (socclk_freq) + * + * Force update wptr for self interrupt ( >= SIENNA_CICHLID). + */ +static void +force_update_wptr_for_self_int(struct amdgpu_device *adev, + u32 threshold, u32 timeout, bool enabled) +{ + u32 ih_cntl, ih_rb_cntl; + + ih_cntl = RREG32_SOC15(OSSSYS, 0, regIH_CNTL2); + ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, regIH_RB_CNTL_RING1); + + ih_cntl = REG_SET_FIELD(ih_cntl, IH_CNTL2, + SELF_IV_FORCE_WPTR_UPDATE_TIMEOUT, timeout); + ih_cntl = REG_SET_FIELD(ih_cntl, IH_CNTL2, + SELF_IV_FORCE_WPTR_UPDATE_ENABLE, enabled); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1, + RB_USED_INT_THRESHOLD, threshold); + + if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) { + if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, ih_rb_cntl)) + return; + } else { + WREG32_SOC15(OSSSYS, 0, regIH_RB_CNTL_RING1, ih_rb_cntl); + } + + WREG32_SOC15(OSSSYS, 0, regIH_CNTL2, ih_cntl); +} + +/** + * ih_v7_0_toggle_ring_interrupts - toggle the interrupt ring buffer + * + * @adev: amdgpu_device pointer + * @ih: amdgpu_ih_ring pointet + * @enable: true - enable the interrupts, false - disable the interrupts + * + * Toggle the interrupt ring buffer (IH_V7_0) + */ +static int ih_v7_0_toggle_ring_interrupts(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih, + bool enable) +{ + struct amdgpu_ih_regs *ih_regs; + uint32_t tmp; + + ih_regs = &ih->ih_regs; + + tmp = RREG32(ih_regs->ih_rb_cntl); + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_ENABLE, (enable ? 1 : 0)); + /* enable_intr field is only valid in ring0 */ + if (ih == &adev->irq.ih) + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 0)); + + if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) { + if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp)) + return -ETIMEDOUT; + } else { + WREG32(ih_regs->ih_rb_cntl, tmp); + } + + if (enable) { + ih->enabled = true; + } else { + /* set rptr, wptr to 0 */ + WREG32(ih_regs->ih_rb_rptr, 0); + WREG32(ih_regs->ih_rb_wptr, 0); + ih->enabled = false; + ih->rptr = 0; + } + + return 0; +} + +/** + * ih_v7_0_toggle_interrupts - Toggle all the available interrupt ring buffers + * + * @adev: amdgpu_device pointer + * @enable: enable or disable interrupt ring buffers + * + * Toggle all the available interrupt ring buffers (IH_V7_0). + */ +static int ih_v7_0_toggle_interrupts(struct amdgpu_device *adev, bool enable) +{ + struct amdgpu_ih_ring *ih[] = {&adev->irq.ih, &adev->irq.ih1}; + int i; + int r; + + for (i = 0; i < ARRAY_SIZE(ih); i++) { + if (ih[i]->ring_size) { + r = ih_v7_0_toggle_ring_interrupts(adev, ih[i], enable); + if (r) + return r; + } + } + + return 0; +} + +static uint32_t ih_v7_0_rb_cntl(struct amdgpu_ih_ring *ih, uint32_t ih_rb_cntl) +{ + int rb_bufsz = order_base_2(ih->ring_size / 4); + + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, + MC_SPACE, ih->use_bus_addr ? 2 : 4); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, + WPTR_OVERFLOW_CLEAR, 1); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, + WPTR_OVERFLOW_ENABLE, 1); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_SIZE, rb_bufsz); + /* Ring Buffer write pointer writeback. If enabled, IH_RB_WPTR register + * value is written to memory + */ + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, + WPTR_WRITEBACK_ENABLE, 1); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_SNOOP, 1); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_RO, 0); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_VMID, 0); + + return ih_rb_cntl; +} + +static uint32_t ih_v7_0_doorbell_rptr(struct amdgpu_ih_ring *ih) +{ + u32 ih_doorbell_rtpr = 0; + + if (ih->use_doorbell) { + ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, + IH_DOORBELL_RPTR, OFFSET, + ih->doorbell_index); + ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, + IH_DOORBELL_RPTR, + ENABLE, 1); + } else { + ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, + IH_DOORBELL_RPTR, + ENABLE, 0); + } + return ih_doorbell_rtpr; +} + +/** + * ih_v7_0_enable_ring - enable an ih ring buffer + * + * @adev: amdgpu_device pointer + * @ih: amdgpu_ih_ring pointer + * + * Enable an ih ring buffer (IH_V7_0) + */ +static int ih_v7_0_enable_ring(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih) +{ + struct amdgpu_ih_regs *ih_regs; + uint32_t tmp; + + ih_regs = &ih->ih_regs; + + /* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/ + WREG32(ih_regs->ih_rb_base, ih->gpu_addr >> 8); + WREG32(ih_regs->ih_rb_base_hi, (ih->gpu_addr >> 40) & 0xff); + + tmp = RREG32(ih_regs->ih_rb_cntl); + tmp = ih_v7_0_rb_cntl(ih, tmp); + if (ih == &adev->irq.ih) + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RPTR_REARM, !!adev->irq.msi_enabled); + if (ih == &adev->irq.ih1) { + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_ENABLE, 0); + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_FULL_DRAIN_ENABLE, 1); + } + + if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) { + if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp)) { + DRM_ERROR("PSP program IH_RB_CNTL failed!\n"); + return -ETIMEDOUT; + } + } else { + WREG32(ih_regs->ih_rb_cntl, tmp); + } + + if (ih == &adev->irq.ih) { + /* set the ih ring 0 writeback address whether it's enabled or not */ + WREG32(ih_regs->ih_rb_wptr_addr_lo, lower_32_bits(ih->wptr_addr)); + WREG32(ih_regs->ih_rb_wptr_addr_hi, upper_32_bits(ih->wptr_addr) & 0xFFFF); + } + + /* set rptr, wptr to 0 */ + WREG32(ih_regs->ih_rb_wptr, 0); + WREG32(ih_regs->ih_rb_rptr, 0); + + WREG32(ih_regs->ih_doorbell_rptr, ih_v7_0_doorbell_rptr(ih)); + + return 0; +} + +/** + * ih_v7_0_irq_init - init and enable the interrupt ring + * + * @adev: amdgpu_device pointer + * + * Allocate a ring buffer for the interrupt controller, + * enable the RLC, disable interrupts, enable the IH + * ring buffer and enable it. + * Called at device load and reume. + * Returns 0 for success, errors for failure. + */ +static int ih_v7_0_irq_init(struct amdgpu_device *adev) +{ + struct amdgpu_ih_ring *ih[] = {&adev->irq.ih, &adev->irq.ih1}; + u32 ih_chicken; + u32 tmp; + int ret; + int i; + + /* disable irqs */ + ret = ih_v7_0_toggle_interrupts(adev, false); + if (ret) + return ret; + + adev->nbio.funcs->ih_control(adev); + + if (unlikely((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) || + (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO))) { + if (ih[0]->use_bus_addr) { + ih_chicken = RREG32_SOC15(OSSSYS, 0, regIH_CHICKEN); + ih_chicken = REG_SET_FIELD(ih_chicken, + IH_CHICKEN, MC_SPACE_GPA_ENABLE, 1); + WREG32_SOC15(OSSSYS, 0, regIH_CHICKEN, ih_chicken); + } + } + + for (i = 0; i < ARRAY_SIZE(ih); i++) { + if (ih[i]->ring_size) { + ret = ih_v7_0_enable_ring(adev, ih[i]); + if (ret) + return ret; + } + } + + /* update doorbell range for ih ring 0 */ + adev->nbio.funcs->ih_doorbell_range(adev, ih[0]->use_doorbell, + ih[0]->doorbell_index); + + tmp = RREG32_SOC15(OSSSYS, 0, regIH_STORM_CLIENT_LIST_CNTL); + tmp = REG_SET_FIELD(tmp, IH_STORM_CLIENT_LIST_CNTL, + CLIENT18_IS_STORM_CLIENT, 1); + WREG32_SOC15(OSSSYS, 0, regIH_STORM_CLIENT_LIST_CNTL, tmp); + + tmp = RREG32_SOC15(OSSSYS, 0, regIH_INT_FLOOD_CNTL); + tmp = REG_SET_FIELD(tmp, IH_INT_FLOOD_CNTL, FLOOD_CNTL_ENABLE, 1); + WREG32_SOC15(OSSSYS, 0, regIH_INT_FLOOD_CNTL, tmp); + + /* GC/MMHUB UTCL2 page fault interrupts are configured as + * MSI storm capable interrupts by deafult. The delay is + * used to avoid ISR being called too frequently + * when page fault happens on several continuous page + * and thus avoid MSI storm */ + tmp = RREG32_SOC15(OSSSYS, 0, regIH_MSI_STORM_CTRL); + tmp = REG_SET_FIELD(tmp, IH_MSI_STORM_CTRL, + DELAY, 3); + WREG32_SOC15(OSSSYS, 0, regIH_MSI_STORM_CTRL, tmp); + + pci_set_master(adev->pdev); + + /* enable interrupts */ + ret = ih_v7_0_toggle_interrupts(adev, true); + if (ret) + return ret; + /* enable wptr force update for self int */ + force_update_wptr_for_self_int(adev, 0, 8, true); + + if (adev->irq.ih_soft.ring_size) + adev->irq.ih_soft.enabled = true; + + return 0; +} + +/** + * ih_v7_0_irq_disable - disable interrupts + * + * @adev: amdgpu_device pointer + * + * Disable interrupts on the hw. + */ +static void ih_v7_0_irq_disable(struct amdgpu_device *adev) +{ + force_update_wptr_for_self_int(adev, 0, 8, false); + ih_v7_0_toggle_interrupts(adev, false); + + /* Wait and acknowledge irq */ + mdelay(1); +} + +/** + * ih_v7_0_get_wptr() - get the IH ring buffer wptr + * + * @adev: amdgpu_device pointer + * @ih: IH ring buffer to fetch wptr + * + * Get the IH ring buffer wptr from either the register + * or the writeback memory buffer. Also check for + * ring buffer overflow and deal with it. + * Returns the value of the wptr. + */ +static u32 ih_v7_0_get_wptr(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih) +{ + u32 wptr, tmp; + struct amdgpu_ih_regs *ih_regs; + + wptr = le32_to_cpu(*ih->wptr_cpu); + ih_regs = &ih->ih_regs; + + if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) + goto out; + + wptr = RREG32_NO_KIQ(ih_regs->ih_rb_wptr); + if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) + goto out; + wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); + + /* When a ring buffer overflow happen start parsing interrupt + * from the last not overwritten vector (wptr + 32). Hopefully + * this should allow us to catch up. + */ + tmp = (wptr + 32) & ih->ptr_mask; + dev_warn(adev->dev, "IH ring buffer overflow " + "(0x%08X, 0x%08X, 0x%08X)\n", + wptr, ih->rptr, tmp); + ih->rptr = tmp; + + tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl); + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); +out: + return (wptr & ih->ptr_mask); +} + +/** + * ih_v7_0_irq_rearm - rearm IRQ if lost + * + * @adev: amdgpu_device pointer + * @ih: IH ring to match + * + */ +static void ih_v7_0_irq_rearm(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih) +{ + uint32_t v = 0; + uint32_t i = 0; + struct amdgpu_ih_regs *ih_regs; + + ih_regs = &ih->ih_regs; + + /* Rearm IRQ / re-write doorbell if doorbell write is lost */ + for (i = 0; i < MAX_REARM_RETRY; i++) { + v = RREG32_NO_KIQ(ih_regs->ih_rb_rptr); + if ((v < ih->ring_size) && (v != ih->rptr)) + WDOORBELL32(ih->doorbell_index, ih->rptr); + else + break; + } +} + +/** + * ih_v7_0_set_rptr - set the IH ring buffer rptr + * + * @adev: amdgpu_device pointer + * @ih: IH ring buffer to set rptr + */ +static void ih_v7_0_set_rptr(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih) +{ + struct amdgpu_ih_regs *ih_regs; + + if (ih->use_doorbell) { + /* XXX check if swapping is necessary on BE */ + *ih->rptr_cpu = ih->rptr; + WDOORBELL32(ih->doorbell_index, ih->rptr); + + if (amdgpu_sriov_vf(adev)) + ih_v7_0_irq_rearm(adev, ih); + } else { + ih_regs = &ih->ih_regs; + WREG32(ih_regs->ih_rb_rptr, ih->rptr); + } +} + +/** + * ih_v7_0_self_irq - dispatch work for ring 1 + * + * @adev: amdgpu_device pointer + * @source: irq source + * @entry: IV with WPTR update + * + * Update the WPTR from the IV and schedule work to handle the entries. + */ +static int ih_v7_0_self_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + uint32_t wptr = cpu_to_le32(entry->src_data[0]); + + switch (entry->ring_id) { + case 1: + *adev->irq.ih1.wptr_cpu = wptr; + schedule_work(&adev->irq.ih1_work); + break; + default: break; + } + return 0; +} + +static const struct amdgpu_irq_src_funcs ih_v7_0_self_irq_funcs = { + .process = ih_v7_0_self_irq, +}; + +static void ih_v7_0_set_self_irq_funcs(struct amdgpu_device *adev) +{ + adev->irq.self_irq.num_types = 0; + adev->irq.self_irq.funcs = &ih_v7_0_self_irq_funcs; +} + +static int ih_v7_0_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + ih_v7_0_set_interrupt_funcs(adev); + ih_v7_0_set_self_irq_funcs(adev); + return 0; +} + +static int ih_v7_0_sw_init(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool use_bus_addr; + + r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_IH, 0, + &adev->irq.self_irq); + + if (r) + return r; + + /* use gpu virtual address for ih ring + * until ih_checken is programmed to allow + * use bus address for ih ring by psp bl */ + use_bus_addr = + (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) ? false : true; + r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, use_bus_addr); + if (r) + return r; + + adev->irq.ih.use_doorbell = true; + adev->irq.ih.doorbell_index = adev->doorbell_index.ih << 1; + + adev->irq.ih1.ring_size = 0; + adev->irq.ih2.ring_size = 0; + + /* initialize ih control register offset */ + ih_v7_0_init_register_offset(adev); + + r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, true); + if (r) + return r; + + r = amdgpu_irq_init(adev); + + return r; +} + +static int ih_v7_0_sw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + amdgpu_irq_fini_sw(adev); + + return 0; +} + +static int ih_v7_0_hw_init(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = ih_v7_0_irq_init(adev); + if (r) + return r; + + return 0; +} + +static int ih_v7_0_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + ih_v7_0_irq_disable(adev); + + return 0; +} + +static int ih_v7_0_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return ih_v7_0_hw_fini(adev); +} + +static int ih_v7_0_resume(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return ih_v7_0_hw_init(adev); +} + +static bool ih_v7_0_is_idle(void *handle) +{ + /* todo */ + return true; +} + +static int ih_v7_0_wait_for_idle(void *handle) +{ + /* todo */ + return -ETIMEDOUT; +} + +static int ih_v7_0_soft_reset(void *handle) +{ + /* todo */ + return 0; +} + +static void ih_v7_0_update_clockgating_state(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data, def, field_val; + + if (adev->cg_flags & AMD_CG_SUPPORT_IH_CG) { + def = data = RREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL); + field_val = enable ? 0 : 1; + data = REG_SET_FIELD(data, IH_CLK_CTRL, + DBUS_MUX_CLK_SOFT_OVERRIDE, field_val); + data = REG_SET_FIELD(data, IH_CLK_CTRL, + OSSSYS_SHARE_CLK_SOFT_OVERRIDE, field_val); + data = REG_SET_FIELD(data, IH_CLK_CTRL, + LIMIT_SMN_CLK_SOFT_OVERRIDE, field_val); + data = REG_SET_FIELD(data, IH_CLK_CTRL, + DYN_CLK_SOFT_OVERRIDE, field_val); + data = REG_SET_FIELD(data, IH_CLK_CTRL, + REG_CLK_SOFT_OVERRIDE, field_val); + if (def != data) + WREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL, data); + } + + return; +} + +static int ih_v7_0_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + ih_v7_0_update_clockgating_state(adev, + state == AMD_CG_STATE_GATE); + return 0; +} + +static void ih_v7_0_update_ih_mem_power_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t ih_mem_pwr_cntl; + + /* Disable ih sram power cntl before switch powergating mode */ + ih_mem_pwr_cntl = RREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL); + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_BUFFER_MEM_POWER_CTRL_EN, 0); + WREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL, ih_mem_pwr_cntl); + + /* It is recommended to set mem powergating mode to DS mode */ + if (enable) { + /* mem power mode */ + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_BUFFER_MEM_POWER_LS_EN, 0); + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_BUFFER_MEM_POWER_DS_EN, 1); + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_BUFFER_MEM_POWER_SD_EN, 0); + /* cam mem power mode */ + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_RETRY_INT_CAM_MEM_POWER_LS_EN, 0); + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_RETRY_INT_CAM_MEM_POWER_DS_EN, 1); + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_RETRY_INT_CAM_MEM_POWER_SD_EN, 0); + /* re-enable power cntl */ + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_BUFFER_MEM_POWER_CTRL_EN, 1); + } else { + /* mem power mode */ + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_BUFFER_MEM_POWER_LS_EN, 0); + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_BUFFER_MEM_POWER_DS_EN, 0); + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_BUFFER_MEM_POWER_SD_EN, 0); + /* cam mem power mode */ + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_RETRY_INT_CAM_MEM_POWER_LS_EN, 0); + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_RETRY_INT_CAM_MEM_POWER_DS_EN, 0); + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_RETRY_INT_CAM_MEM_POWER_SD_EN, 0); + /* re-enable power cntl*/ + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_BUFFER_MEM_POWER_CTRL_EN, 1); + } + + WREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL, ih_mem_pwr_cntl); +} + +static int ih_v7_0_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool enable = (state == AMD_PG_STATE_GATE); + + if (adev->pg_flags & AMD_PG_SUPPORT_IH_SRAM_PG) + ih_v7_0_update_ih_mem_power_gating(adev, enable); + + return 0; +} + +static void ih_v7_0_get_clockgating_state(void *handle, u64 *flags) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (!RREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL)) + *flags |= AMD_CG_SUPPORT_IH_CG; + + return; +} + +static const struct amd_ip_funcs ih_v7_0_ip_funcs = { + .name = "ih_v7_0", + .early_init = ih_v7_0_early_init, + .late_init = NULL, + .sw_init = ih_v7_0_sw_init, + .sw_fini = ih_v7_0_sw_fini, + .hw_init = ih_v7_0_hw_init, + .hw_fini = ih_v7_0_hw_fini, + .suspend = ih_v7_0_suspend, + .resume = ih_v7_0_resume, + .is_idle = ih_v7_0_is_idle, + .wait_for_idle = ih_v7_0_wait_for_idle, + .soft_reset = ih_v7_0_soft_reset, + .set_clockgating_state = ih_v7_0_set_clockgating_state, + .set_powergating_state = ih_v7_0_set_powergating_state, + .get_clockgating_state = ih_v7_0_get_clockgating_state, +}; + +static const struct amdgpu_ih_funcs ih_v7_0_funcs = { + .get_wptr = ih_v7_0_get_wptr, + .decode_iv = amdgpu_ih_decode_iv_helper, + .decode_iv_ts = amdgpu_ih_decode_iv_ts_helper, + .set_rptr = ih_v7_0_set_rptr +}; + +static void ih_v7_0_set_interrupt_funcs(struct amdgpu_device *adev) +{ + adev->irq.ih_funcs = &ih_v7_0_funcs; +} + +const struct amdgpu_ip_block_version ih_v7_0_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_IH, + .major = 7, + .minor = 0, + .rev = 0, + .funcs = &ih_v7_0_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v7_0.h b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.h new file mode 100644 index 0000000000..af9dcbc451 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.h @@ -0,0 +1,28 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __IH_V7_0_IH_H__ +#define __IH_V7_0_IH_H__ + +extern const struct amdgpu_ip_block_version ih_v7_0_ip_block; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c index c0bdab3bf0..3e91a8e42c 100644 --- a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c @@ -37,6 +37,7 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_2_imu.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_3_imu.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_4_imu.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_0_imu.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_1_imu.bin"); static int imu_v11_0_init_microcode(struct amdgpu_device *adev) { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index e67a337457..99cd49ee8e 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -551,7 +551,7 @@ static int jpeg_v2_5_set_powergating_state(void *handle, struct amdgpu_device *adev = (struct amdgpu_device *)handle; int ret; - if(state == adev->jpeg.cur_state) + if (state == adev->jpeg.cur_state) return 0; if (state == AMD_PG_STATE_GATE) @@ -559,7 +559,7 @@ static int jpeg_v2_5_set_powergating_state(void *handle, else ret = jpeg_v2_5_start(adev); - if(!ret) + if (!ret) adev->jpeg.cur_state = state; return ret; @@ -754,8 +754,7 @@ static void jpeg_v2_5_set_irq_funcs(struct amdgpu_device *adev) } } -const struct amdgpu_ip_block_version jpeg_v2_5_ip_block = -{ +const struct amdgpu_ip_block_version jpeg_v2_5_ip_block = { .type = AMD_IP_BLOCK_TYPE_JPEG, .major = 2, .minor = 5, @@ -763,8 +762,7 @@ const struct amdgpu_ip_block_version jpeg_v2_5_ip_block = .funcs = &jpeg_v2_5_ip_funcs, }; -const struct amdgpu_ip_block_version jpeg_v2_6_ip_block = -{ +const struct amdgpu_ip_block_version jpeg_v2_6_ip_block = { .type = AMD_IP_BLOCK_TYPE_JPEG, .major = 2, .minor = 6, diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index 82b6b62c17..32caeb37ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -652,7 +652,7 @@ static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring) * * Write a start command to the ring. */ -static void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring) +void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring) { if (!amdgpu_sriov_vf(ring->adev)) { amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, @@ -672,7 +672,7 @@ static void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring) * * Write a end command to the ring. */ -static void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring) +void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring) { if (!amdgpu_sriov_vf(ring->adev)) { amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, @@ -695,7 +695,7 @@ static void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring) * * Write a fence and a trap command to the ring. */ -static void jpeg_v4_0_3_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, +void jpeg_v4_0_3_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, unsigned int flags) { WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); @@ -764,7 +764,7 @@ static void jpeg_v4_0_3_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, * * Write ring commands to execute the indirect buffer. */ -static void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, +void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, struct amdgpu_ib *ib, uint32_t flags) @@ -815,7 +815,7 @@ static void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, 0x2); } -static void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, +void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, uint32_t val, uint32_t mask) { uint32_t reg_offset = (reg << 2); @@ -842,7 +842,7 @@ static void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_ amdgpu_ring_write(ring, mask); } -static void jpeg_v4_0_3_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, +void jpeg_v4_0_3_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned int vmid, uint64_t pd_addr) { struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; @@ -857,7 +857,7 @@ static void jpeg_v4_0_3_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, jpeg_v4_0_3_dec_ring_emit_reg_wait(ring, data0, data1, mask); } -static void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) +void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) { uint32_t reg_offset = (reg << 2); @@ -875,7 +875,7 @@ static void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t re amdgpu_ring_write(ring, val); } -static void jpeg_v4_0_3_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count) +void jpeg_v4_0_3_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count) { int i; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h index 22483dc663..747a3e5f68 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h @@ -48,4 +48,19 @@ extern const struct amdgpu_ip_block_version jpeg_v4_0_3_ip_block; +void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, + struct amdgpu_ib *ib, + uint32_t flags); +void jpeg_v4_0_3_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, + unsigned int flags); +void jpeg_v4_0_3_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned int vmid, uint64_t pd_addr); +void jpeg_v4_0_3_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count); +void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring); +void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring); +void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); +void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask); + #endif /* __JPEG_V4_0_3_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c index 78b74daf4e..edf5bcdd2b 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c @@ -34,7 +34,17 @@ #include "vcn/vcn_4_0_5_sh_mask.h" #include "ivsrcid/vcn/irqsrcs_vcn_4_0.h" -#define regUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f +#define mmUVD_DPG_LMA_CTL regUVD_DPG_LMA_CTL +#define mmUVD_DPG_LMA_CTL_BASE_IDX regUVD_DPG_LMA_CTL_BASE_IDX +#define mmUVD_DPG_LMA_DATA regUVD_DPG_LMA_DATA +#define mmUVD_DPG_LMA_DATA_BASE_IDX regUVD_DPG_LMA_DATA_BASE_IDX + +#define regUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f +#define regJPEG_DEC_GFX10_ADDR_CONFIG_INTERNAL_OFFSET 0x4026 +#define regJPEG_SYS_INT_EN_INTERNAL_OFFSET 0x4141 +#define regJPEG_CGC_CTRL_INTERNAL_OFFSET 0x4161 +#define regJPEG_CGC_GATE_INTERNAL_OFFSET 0x4160 +#define regUVD_NO_OP_INTERNAL_OFFSET 0x0029 static void jpeg_v4_0_5_set_dec_ring_funcs(struct amdgpu_device *adev); static void jpeg_v4_0_5_set_irq_funcs(struct amdgpu_device *adev); @@ -43,6 +53,11 @@ static int jpeg_v4_0_5_set_powergating_state(void *handle, static void jpeg_v4_0_5_dec_ring_set_wptr(struct amdgpu_ring *ring); +static int amdgpu_ih_clientid_jpeg[] = { + SOC15_IH_CLIENTID_VCN, + SOC15_IH_CLIENTID_VCN1 +}; + /** * jpeg_v4_0_5_early_init - set function pointers * @@ -54,8 +69,20 @@ static int jpeg_v4_0_5_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + switch (amdgpu_ip_version(adev, UVD_HWIP, 0)) { + case IP_VERSION(4, 0, 5): + adev->jpeg.num_jpeg_inst = 1; + break; + case IP_VERSION(4, 0, 6): + adev->jpeg.num_jpeg_inst = 2; + break; + default: + DRM_DEV_ERROR(adev->dev, + "Failed to init vcn ip block(UVD_HWIP:0x%x)\n", + amdgpu_ip_version(adev, UVD_HWIP, 0)); + return -EINVAL; + } - adev->jpeg.num_jpeg_inst = 1; adev->jpeg.num_jpeg_rings = 1; jpeg_v4_0_5_set_dec_ring_funcs(adev); @@ -75,25 +102,30 @@ static int jpeg_v4_0_5_sw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_ring *ring; - int r; - - /* JPEG TRAP */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, - VCN_4_0__SRCID__JPEG_DECODE, &adev->jpeg.inst->irq); - if (r) - return r; - - /* JPEG DJPEG POISON EVENT */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, - VCN_4_0__SRCID_DJPEG0_POISON, &adev->jpeg.inst->irq); - if (r) - return r; - - /* JPEG EJPEG POISON EVENT */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, - VCN_4_0__SRCID_EJPEG0_POISON, &adev->jpeg.inst->irq); - if (r) - return r; + int r, i; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + /* JPEG TRAP */ + r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_jpeg[i], + VCN_4_0__SRCID__JPEG_DECODE, &adev->jpeg.inst[i].irq); + if (r) + return r; + + /* JPEG DJPEG POISON EVENT */ + r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_jpeg[i], + VCN_4_0__SRCID_DJPEG0_POISON, &adev->jpeg.inst[i].irq); + if (r) + return r; + + /* JPEG EJPEG POISON EVENT */ + r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_jpeg[i], + VCN_4_0__SRCID_EJPEG0_POISON, &adev->jpeg.inst[i].irq); + if (r) + return r; + } r = amdgpu_jpeg_sw_init(adev); if (r) @@ -103,21 +135,23 @@ static int jpeg_v4_0_5_sw_init(void *handle) if (r) return r; - ring = adev->jpeg.inst->ring_dec; - ring->use_doorbell = true; - ring->doorbell_index = amdgpu_sriov_vf(adev) ? - (((adev->doorbell_index.vcn.vcn_ring0_1) << 1) + 4) : - ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1); - ring->vm_hub = AMDGPU_MMHUB0(0); - - sprintf(ring->name, "jpeg_dec"); - r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, - AMDGPU_RING_PRIO_DEFAULT, NULL); - if (r) - return r; - - adev->jpeg.internal.jpeg_pitch[0] = regUVD_JPEG_PITCH_INTERNAL_OFFSET; - adev->jpeg.inst->external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_PITCH); + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + ring = adev->jpeg.inst[i].ring_dec; + ring->use_doorbell = true; + ring->vm_hub = AMDGPU_MMHUB0(0); + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + 8 * i; + sprintf(ring->name, "jpeg_dec_%d", i); + r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst[i].irq, + 0, AMDGPU_RING_PRIO_DEFAULT, NULL); + if (r) + return r; + + adev->jpeg.internal.jpeg_pitch[0] = regUVD_JPEG_PITCH_INTERNAL_OFFSET; + adev->jpeg.inst[i].external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, i, regUVD_JPEG_PITCH); + } return 0; } @@ -152,14 +186,27 @@ static int jpeg_v4_0_5_sw_fini(void *handle) static int jpeg_v4_0_5_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec; - int r; + struct amdgpu_ring *ring; + int r, i; - r = amdgpu_ring_test_helper(ring); - if (r) - return r; + // TODO: Enable ring test with DPG support + if (adev->pg_flags & AMD_PG_SUPPORT_JPEG_DPG) { + DRM_DEV_INFO(adev->dev, "JPEG decode initialized successfully under DPG Mode"); + return 0; + } + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; - DRM_DEV_INFO(adev->dev, "JPEG decode initialized successfully.\n"); + ring = adev->jpeg.inst[i].ring_dec; + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + } + + if (!r) + DRM_INFO("JPEG decode initialized successfully under SPG Mode\n"); return 0; } @@ -174,14 +221,20 @@ static int jpeg_v4_0_5_hw_init(void *handle) static int jpeg_v4_0_5_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i; cancel_delayed_work_sync(&adev->vcn.idle_work); - if (!amdgpu_sriov_vf(adev)) { - if (adev->jpeg.cur_state != AMD_PG_STATE_GATE && - RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS)) - jpeg_v4_0_5_set_powergating_state(adev, AMD_PG_STATE_GATE); - } + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + if (!amdgpu_sriov_vf(adev)) { + if (adev->jpeg.cur_state != AMD_PG_STATE_GATE && + RREG32_SOC15(JPEG, i, regUVD_JRBC_STATUS)) + jpeg_v4_0_5_set_powergating_state(adev, AMD_PG_STATE_GATE); + } + } return 0; } @@ -227,11 +280,11 @@ static int jpeg_v4_0_5_resume(void *handle) return r; } -static void jpeg_v4_0_5_disable_clock_gating(struct amdgpu_device *adev) +static void jpeg_v4_0_5_disable_clock_gating(struct amdgpu_device *adev, int inst) { uint32_t data = 0; - data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL); + data = RREG32_SOC15(JPEG, inst, regJPEG_CGC_CTRL); if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) { data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; data &= (~JPEG_CGC_CTRL__JPEG_DEC_MODE_MASK); @@ -241,21 +294,21 @@ static void jpeg_v4_0_5_disable_clock_gating(struct amdgpu_device *adev) data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; - WREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL, data); + WREG32_SOC15(JPEG, inst, regJPEG_CGC_CTRL, data); - data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE); + data = RREG32_SOC15(JPEG, inst, regJPEG_CGC_GATE); data &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK | JPEG_CGC_GATE__JPEG2_DEC_MASK | JPEG_CGC_GATE__JMCIF_MASK | JPEG_CGC_GATE__JRBBM_MASK); - WREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE, data); + WREG32_SOC15(JPEG, inst, regJPEG_CGC_GATE, data); } -static void jpeg_v4_0_5_enable_clock_gating(struct amdgpu_device *adev) +static void jpeg_v4_0_5_enable_clock_gating(struct amdgpu_device *adev, int inst) { uint32_t data = 0; - data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL); + data = RREG32_SOC15(JPEG, inst, regJPEG_CGC_CTRL); if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) { data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; data |= JPEG_CGC_CTRL__JPEG_DEC_MODE_MASK; @@ -265,47 +318,66 @@ static void jpeg_v4_0_5_enable_clock_gating(struct amdgpu_device *adev) data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; - WREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL, data); + WREG32_SOC15(JPEG, inst, regJPEG_CGC_CTRL, data); - data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE); + data = RREG32_SOC15(JPEG, inst, regJPEG_CGC_GATE); data |= (JPEG_CGC_GATE__JPEG_DEC_MASK |JPEG_CGC_GATE__JPEG2_DEC_MASK |JPEG_CGC_GATE__JMCIF_MASK |JPEG_CGC_GATE__JRBBM_MASK); - WREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE, data); + WREG32_SOC15(JPEG, inst, regJPEG_CGC_GATE, data); } -static int jpeg_v4_0_5_disable_static_power_gating(struct amdgpu_device *adev) +static void jpeg_engine_4_0_5_dpg_clock_gating_mode(struct amdgpu_device *adev, + int inst_idx, uint8_t indirect) +{ + uint32_t data = 0; + + if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) + data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + data |= 0 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + + data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15_JPEG_DPG_MODE(inst_idx, regJPEG_CGC_CTRL_INTERNAL_OFFSET, data, indirect); + + data = 0; + WREG32_SOC15_JPEG_DPG_MODE(inst_idx, regJPEG_CGC_GATE_INTERNAL_OFFSET, + data, indirect); +} + +static int jpeg_v4_0_5_disable_static_power_gating(struct amdgpu_device *adev, int inst) { if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) { - WREG32(SOC15_REG_OFFSET(JPEG, 0, regUVD_IPX_DLDO_CONFIG), + WREG32(SOC15_REG_OFFSET(JPEG, inst, regUVD_IPX_DLDO_CONFIG), 1 << UVD_IPX_DLDO_CONFIG__ONO1_PWR_CONFIG__SHIFT); - SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_IPX_DLDO_STATUS, + SOC15_WAIT_ON_RREG(JPEG, inst, regUVD_IPX_DLDO_STATUS, 0, UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS_MASK); } /* disable anti hang mechanism */ - WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS), 0, + WREG32_P(SOC15_REG_OFFSET(JPEG, inst, regUVD_JPEG_POWER_STATUS), 0, ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); /* keep the JPEG in static PG mode */ - WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS), 0, + WREG32_P(SOC15_REG_OFFSET(JPEG, inst, regUVD_JPEG_POWER_STATUS), 0, ~UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK); return 0; } -static int jpeg_v4_0_5_enable_static_power_gating(struct amdgpu_device *adev) +static int jpeg_v4_0_5_enable_static_power_gating(struct amdgpu_device *adev, int inst) { /* enable anti hang mechanism */ - WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS), + WREG32_P(SOC15_REG_OFFSET(JPEG, inst, regUVD_JPEG_POWER_STATUS), UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK, ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) { - WREG32(SOC15_REG_OFFSET(JPEG, 0, regUVD_IPX_DLDO_CONFIG), + WREG32(SOC15_REG_OFFSET(JPEG, inst, regUVD_IPX_DLDO_CONFIG), 2 << UVD_IPX_DLDO_CONFIG__ONO1_PWR_CONFIG__SHIFT); - SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_IPX_DLDO_STATUS, + SOC15_WAIT_ON_RREG(JPEG, inst, regUVD_IPX_DLDO_STATUS, 1 << UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS__SHIFT, UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS_MASK); } @@ -314,61 +386,153 @@ static int jpeg_v4_0_5_enable_static_power_gating(struct amdgpu_device *adev) } /** - * jpeg_v4_0_5_start - start JPEG block + * jpeg_v4_0_5_start_dpg_mode - Jpeg start with dpg mode * * @adev: amdgpu_device pointer + * @inst_idx: instance number index + * @indirect: indirectly write sram * - * Setup and start the JPEG block + * Start JPEG block with dpg mode */ -static int jpeg_v4_0_5_start(struct amdgpu_device *adev) +static void jpeg_v4_0_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) { - struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec; - int r; + struct amdgpu_ring *ring = adev->jpeg.inst[inst_idx].ring_dec; + uint32_t reg_data = 0; - if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_jpeg(adev, true); + /* enable anti hang mechanism */ + reg_data = RREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS); + reg_data &= ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK; + reg_data |= 0x1; + WREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS, reg_data); - /* doorbell programming is done for every playback */ - adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, - (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0); + if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) { + WREG32(SOC15_REG_OFFSET(JPEG, inst_idx, regUVD_IPX_DLDO_CONFIG), + 2 << UVD_IPX_DLDO_CONFIG__ONO1_PWR_CONFIG__SHIFT); + SOC15_WAIT_ON_RREG(JPEG, inst_idx, regUVD_IPX_DLDO_STATUS, + 1 << UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS__SHIFT, + UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS_MASK); + } - WREG32_SOC15(VCN, 0, regVCN_JPEG_DB_CTRL, - ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT | - VCN_JPEG_DB_CTRL__EN_MASK); + reg_data = RREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS); + reg_data |= UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK; + WREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS, reg_data); - /* disable power gating */ - r = jpeg_v4_0_5_disable_static_power_gating(adev); - if (r) - return r; + if (indirect) + adev->jpeg.inst[inst_idx].dpg_sram_curr_addr = + (uint32_t *)adev->jpeg.inst[inst_idx].dpg_sram_cpu_addr; - /* JPEG disable CGC */ - jpeg_v4_0_5_disable_clock_gating(adev); + jpeg_engine_4_0_5_dpg_clock_gating_mode(adev, inst_idx, indirect); /* MJPEG global tiling registers */ - WREG32_SOC15(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG, - adev->gfx.config.gb_addr_config); - + WREG32_SOC15_JPEG_DPG_MODE(inst_idx, regJPEG_DEC_GFX10_ADDR_CONFIG_INTERNAL_OFFSET, + adev->gfx.config.gb_addr_config, indirect); + /* enable System Interrupt for JRBC */ + WREG32_SOC15_JPEG_DPG_MODE(inst_idx, regJPEG_SYS_INT_EN_INTERNAL_OFFSET, + JPEG_SYS_INT_EN__DJRBC_MASK, indirect); - /* enable JMI channel */ - WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI_CNTL), 0, - ~UVD_JMI_CNTL__SOFT_RESET_MASK); + /* add nop to workaround PSP size check */ + WREG32_SOC15_JPEG_DPG_MODE(inst_idx, regUVD_NO_OP_INTERNAL_OFFSET, 0, indirect); - /* enable System Interrupt for JRBC */ - WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regJPEG_SYS_INT_EN), - JPEG_SYS_INT_EN__DJRBC_MASK, - ~JPEG_SYS_INT_EN__DJRBC_MASK); + if (indirect) + amdgpu_jpeg_psp_update_sram(adev, inst_idx, 0); - WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_VMID, 0); - WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L)); - WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_64BIT_BAR_LOW, + WREG32_SOC15(JPEG, inst_idx, regUVD_LMI_JRBC_RB_VMID, 0); + WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L)); + WREG32_SOC15(JPEG, inst_idx, regUVD_LMI_JRBC_RB_64BIT_BAR_LOW, lower_32_bits(ring->gpu_addr)); - WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, + WREG32_SOC15(JPEG, inst_idx, regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_RPTR, 0); - WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR, 0); - WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_CNTL, 0x00000002L); - WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_SIZE, ring->ring_size / 4); - ring->wptr = RREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR); + WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_RPTR, 0); + WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_WPTR, 0); + WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_CNTL, 0x00000002L); + WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_SIZE, ring->ring_size / 4); + ring->wptr = RREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_WPTR); +} + +/** + * jpeg_v4_0_5_stop_dpg_mode - Jpeg stop with dpg mode + * + * @adev: amdgpu_device pointer + * @inst_idx: instance number index + * + * Stop JPEG block with dpg mode + */ +static void jpeg_v4_0_5_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) +{ + uint32_t reg_data = 0; + + reg_data = RREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS); + reg_data &= ~UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK; + WREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS, reg_data); + +} + +/** + * jpeg_v4_0_5_start - start JPEG block + * + * @adev: amdgpu_device pointer + * + * Setup and start the JPEG block + */ +static int jpeg_v4_0_5_start(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + int r, i; + + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_jpeg(adev, true); + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + ring = adev->jpeg.inst[i].ring_dec; + /* doorbell programming is done for every playback */ + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i, i); + + WREG32_SOC15(VCN, i, regVCN_JPEG_DB_CTRL, + ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT | + VCN_JPEG_DB_CTRL__EN_MASK); + + if (adev->pg_flags & AMD_PG_SUPPORT_JPEG_DPG) { + jpeg_v4_0_5_start_dpg_mode(adev, i, adev->jpeg.indirect_sram); + continue; + } + + /* disable power gating */ + r = jpeg_v4_0_5_disable_static_power_gating(adev, i); + if (r) + return r; + + /* JPEG disable CGC */ + jpeg_v4_0_5_disable_clock_gating(adev, i); + + /* MJPEG global tiling registers */ + WREG32_SOC15(JPEG, i, regJPEG_DEC_GFX10_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + + /* enable JMI channel */ + WREG32_P(SOC15_REG_OFFSET(JPEG, i, regUVD_JMI_CNTL), 0, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + /* enable System Interrupt for JRBC */ + WREG32_P(SOC15_REG_OFFSET(JPEG, i, regJPEG_SYS_INT_EN), + JPEG_SYS_INT_EN__DJRBC_MASK, + ~JPEG_SYS_INT_EN__DJRBC_MASK); + + WREG32_SOC15(JPEG, i, regUVD_LMI_JRBC_RB_VMID, 0); + WREG32_SOC15(JPEG, i, regUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L)); + WREG32_SOC15(JPEG, i, regUVD_LMI_JRBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(JPEG, i, regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(JPEG, i, regUVD_JRBC_RB_RPTR, 0); + WREG32_SOC15(JPEG, i, regUVD_JRBC_RB_WPTR, 0); + WREG32_SOC15(JPEG, i, regUVD_JRBC_RB_CNTL, 0x00000002L); + WREG32_SOC15(JPEG, i, regUVD_JRBC_RB_SIZE, ring->ring_size / 4); + ring->wptr = RREG32_SOC15(JPEG, i, regUVD_JRBC_RB_WPTR); + } return 0; } @@ -382,20 +546,29 @@ static int jpeg_v4_0_5_start(struct amdgpu_device *adev) */ static int jpeg_v4_0_5_stop(struct amdgpu_device *adev) { - int r; + int r, i; - /* reset JMI */ - WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI_CNTL), - UVD_JMI_CNTL__SOFT_RESET_MASK, - ~UVD_JMI_CNTL__SOFT_RESET_MASK); + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; - jpeg_v4_0_5_enable_clock_gating(adev); + if (adev->pg_flags & AMD_PG_SUPPORT_JPEG_DPG) { + jpeg_v4_0_5_stop_dpg_mode(adev, i); + continue; + } - /* enable power gating */ - r = jpeg_v4_0_5_enable_static_power_gating(adev); - if (r) - return r; + /* reset JMI */ + WREG32_P(SOC15_REG_OFFSET(JPEG, i, regUVD_JMI_CNTL), + UVD_JMI_CNTL__SOFT_RESET_MASK, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + jpeg_v4_0_5_enable_clock_gating(adev, i); + /* enable power gating */ + r = jpeg_v4_0_5_enable_static_power_gating(adev, i); + if (r) + return r; + } if (adev->pm.dpm_enabled) amdgpu_dpm_enable_jpeg(adev, false); @@ -413,7 +586,7 @@ static uint64_t jpeg_v4_0_5_dec_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - return RREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_RPTR); + return RREG32_SOC15(JPEG, ring->me, regUVD_JRBC_RB_RPTR); } /** @@ -430,7 +603,7 @@ static uint64_t jpeg_v4_0_5_dec_ring_get_wptr(struct amdgpu_ring *ring) if (ring->use_doorbell) return *ring->wptr_cpu_addr; else - return RREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR); + return RREG32_SOC15(JPEG, ring->me, regUVD_JRBC_RB_WPTR); } /** @@ -448,29 +621,41 @@ static void jpeg_v4_0_5_dec_ring_set_wptr(struct amdgpu_ring *ring) *ring->wptr_cpu_addr = lower_32_bits(ring->wptr); WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); } else { - WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(JPEG, ring->me, regUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr)); } } static bool jpeg_v4_0_5_is_idle(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int ret = 1; + int i, ret = 1; - ret &= (((RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS) & - UVD_JRBC_STATUS__RB_JOB_DONE_MASK) == - UVD_JRBC_STATUS__RB_JOB_DONE_MASK)); + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + ret &= (((RREG32_SOC15(JPEG, i, regUVD_JRBC_STATUS) & + UVD_JRBC_STATUS__RB_JOB_DONE_MASK) == + UVD_JRBC_STATUS__RB_JOB_DONE_MASK)); + } return ret; } static int jpeg_v4_0_5_wait_for_idle(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + return SOC15_WAIT_ON_RREG(JPEG, i, regUVD_JRBC_STATUS, + UVD_JRBC_STATUS__RB_JOB_DONE_MASK, + UVD_JRBC_STATUS__RB_JOB_DONE_MASK); + } - return SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_JRBC_STATUS, - UVD_JRBC_STATUS__RB_JOB_DONE_MASK, - UVD_JRBC_STATUS__RB_JOB_DONE_MASK); + return 0; } static int jpeg_v4_0_5_set_clockgating_state(void *handle, @@ -478,13 +663,20 @@ static int jpeg_v4_0_5_set_clockgating_state(void *handle, { struct amdgpu_device *adev = (struct amdgpu_device *)handle; bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + int i; - if (enable) { - if (!jpeg_v4_0_5_is_idle(handle)) - return -EBUSY; - jpeg_v4_0_5_enable_clock_gating(adev); - } else { - jpeg_v4_0_5_disable_clock_gating(adev); + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + if (enable) { + if (!jpeg_v4_0_5_is_idle(handle)) + return -EBUSY; + + jpeg_v4_0_5_enable_clock_gating(adev, i); + } else { + jpeg_v4_0_5_disable_clock_gating(adev, i); + } } return 0; @@ -519,11 +711,25 @@ static int jpeg_v4_0_5_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { + uint32_t ip_instance; + DRM_DEBUG("IH: JPEG TRAP\n"); + switch (entry->client_id) { + case SOC15_IH_CLIENTID_VCN: + ip_instance = 0; + break; + case SOC15_IH_CLIENTID_VCN1: + ip_instance = 1; + break; + default: + DRM_ERROR("Unhandled client id: %d\n", entry->client_id); + return 0; + } + switch (entry->src_id) { case VCN_4_0__SRCID__JPEG_DECODE: - amdgpu_fence_process(adev->jpeg.inst->ring_dec); + amdgpu_fence_process(adev->jpeg.inst[ip_instance].ring_dec); break; case VCN_4_0__SRCID_DJPEG0_POISON: case VCN_4_0__SRCID_EJPEG0_POISON: @@ -589,8 +795,16 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_5_dec_ring_vm_funcs = { static void jpeg_v4_0_5_set_dec_ring_funcs(struct amdgpu_device *adev) { - adev->jpeg.inst->ring_dec->funcs = &jpeg_v4_0_5_dec_ring_vm_funcs; - DRM_DEV_INFO(adev->dev, "JPEG decode is enabled in VM mode\n"); + int i; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + adev->jpeg.inst[i].ring_dec->funcs = &jpeg_v4_0_5_dec_ring_vm_funcs; + adev->jpeg.inst[i].ring_dec->me = i; + DRM_DEV_INFO(adev->dev, "JPEG%d decode is enabled in VM mode\n", i); + } } static const struct amdgpu_irq_src_funcs jpeg_v4_0_5_irq_funcs = { @@ -599,8 +813,15 @@ static const struct amdgpu_irq_src_funcs jpeg_v4_0_5_irq_funcs = { static void jpeg_v4_0_5_set_irq_funcs(struct amdgpu_device *adev) { - adev->jpeg.inst->irq.num_types = 1; - adev->jpeg.inst->irq.funcs = &jpeg_v4_0_5_irq_funcs; + int i; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + adev->jpeg.inst[i].irq.num_types = 1; + adev->jpeg.inst[i].irq.funcs = &jpeg_v4_0_5_irq_funcs; + } } const struct amdgpu_ip_block_version jpeg_v4_0_5_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c new file mode 100644 index 0000000000..e70200f975 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c @@ -0,0 +1,570 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "amdgpu_jpeg.h" +#include "amdgpu_pm.h" +#include "soc15.h" +#include "soc15d.h" +#include "jpeg_v4_0_3.h" + +#include "vcn/vcn_5_0_0_offset.h" +#include "vcn/vcn_5_0_0_sh_mask.h" +#include "ivsrcid/vcn/irqsrcs_vcn_4_0.h" + +static void jpeg_v5_0_0_set_dec_ring_funcs(struct amdgpu_device *adev); +static void jpeg_v5_0_0_set_irq_funcs(struct amdgpu_device *adev); +static int jpeg_v5_0_0_set_powergating_state(void *handle, + enum amd_powergating_state state); + +/** + * jpeg_v5_0_0_early_init - set function pointers + * + * @handle: amdgpu_device pointer + * + * Set ring and irq function pointers + */ +static int jpeg_v5_0_0_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->jpeg.num_jpeg_inst = 1; + adev->jpeg.num_jpeg_rings = 1; + + jpeg_v5_0_0_set_dec_ring_funcs(adev); + jpeg_v5_0_0_set_irq_funcs(adev); + + return 0; +} + +/** + * jpeg_v5_0_0_sw_init - sw init for JPEG block + * + * @handle: amdgpu_device pointer + * + * Load firmware and sw initialization + */ +static int jpeg_v5_0_0_sw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring; + int r; + + /* JPEG TRAP */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, + VCN_4_0__SRCID__JPEG_DECODE, &adev->jpeg.inst->irq); + if (r) + return r; + + r = amdgpu_jpeg_sw_init(adev); + if (r) + return r; + + r = amdgpu_jpeg_resume(adev); + if (r) + return r; + + ring = adev->jpeg.inst->ring_dec; + ring->use_doorbell = true; + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1; + ring->vm_hub = AMDGPU_MMHUB0(0); + + sprintf(ring->name, "jpeg_dec"); + r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, + AMDGPU_RING_PRIO_DEFAULT, NULL); + if (r) + return r; + + adev->jpeg.internal.jpeg_pitch[0] = regUVD_JPEG_PITCH_INTERNAL_OFFSET; + adev->jpeg.inst->external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_PITCH); + + return 0; +} + +/** + * jpeg_v5_0_0_sw_fini - sw fini for JPEG block + * + * @handle: amdgpu_device pointer + * + * JPEG suspend and free up sw allocation + */ +static int jpeg_v5_0_0_sw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = amdgpu_jpeg_suspend(adev); + if (r) + return r; + + r = amdgpu_jpeg_sw_fini(adev); + + return r; +} + +/** + * jpeg_v5_0_0_hw_init - start and test JPEG block + * + * @handle: amdgpu_device pointer + * + */ +static int jpeg_v5_0_0_hw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec; + int r; + + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0); + + WREG32_SOC15(VCN, 0, regVCN_JPEG_DB_CTRL, + ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT | + VCN_JPEG_DB_CTRL__EN_MASK); + + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + + DRM_DEV_INFO(adev->dev, "JPEG decode initialized successfully.\n"); + + return 0; +} + +/** + * jpeg_v5_0_0_hw_fini - stop the hardware block + * + * @handle: amdgpu_device pointer + * + * Stop the JPEG block, mark ring as not ready any more + */ +static int jpeg_v5_0_0_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + cancel_delayed_work_sync(&adev->vcn.idle_work); + + if (adev->jpeg.cur_state != AMD_PG_STATE_GATE && + RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS)) + jpeg_v5_0_0_set_powergating_state(adev, AMD_PG_STATE_GATE); + + return 0; +} + +/** + * jpeg_v5_0_0_suspend - suspend JPEG block + * + * @handle: amdgpu_device pointer + * + * HW fini and suspend JPEG block + */ +static int jpeg_v5_0_0_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = jpeg_v5_0_0_hw_fini(adev); + if (r) + return r; + + r = amdgpu_jpeg_suspend(adev); + + return r; +} + +/** + * jpeg_v5_0_0_resume - resume JPEG block + * + * @handle: amdgpu_device pointer + * + * Resume firmware and hw init JPEG block + */ +static int jpeg_v5_0_0_resume(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = amdgpu_jpeg_resume(adev); + if (r) + return r; + + r = jpeg_v5_0_0_hw_init(adev); + + return r; +} + +static void jpeg_v5_0_0_disable_clock_gating(struct amdgpu_device *adev) +{ + uint32_t data = 0; + + WREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE, data); + + data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL); + data &= ~(JPEG_CGC_CTRL__JPEG0_DEC_MODE_MASK + | JPEG_CGC_CTRL__JPEG_ENC_MODE_MASK); + WREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL, data); +} + +static void jpeg_v5_0_0_enable_clock_gating(struct amdgpu_device *adev) +{ + uint32_t data = 0; + + data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL); + + data |= 1 << JPEG_CGC_CTRL__JPEG0_DEC_MODE__SHIFT; + WREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL, data); + + data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE); + data |= (JPEG_CGC_GATE__JPEG0_DEC_MASK + |JPEG_CGC_GATE__JPEG_ENC_MASK + |JPEG_CGC_GATE__JMCIF_MASK + |JPEG_CGC_GATE__JRBBM_MASK); + WREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE, data); +} + +static int jpeg_v5_0_0_disable_static_power_gating(struct amdgpu_device *adev) +{ + uint32_t data = 0; + + data = 1 << UVD_IPX_DLDO_CONFIG__ONO1_PWR_CONFIG__SHIFT; + WREG32_SOC15(JPEG, 0, regUVD_IPX_DLDO_CONFIG, data); + SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_IPX_DLDO_STATUS, 0, + UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS_MASK); + + /* disable anti hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS), 0, + ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + + /* keep the JPEG in static PG mode */ + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS), 0, + ~UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK); + + return 0; +} + +static int jpeg_v5_0_0_enable_static_power_gating(struct amdgpu_device *adev) +{ + /* enable anti hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS), + UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK, + ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + + if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) { + WREG32(SOC15_REG_OFFSET(JPEG, 0, regUVD_IPX_DLDO_CONFIG), + 2 << UVD_IPX_DLDO_CONFIG__ONO1_PWR_CONFIG__SHIFT); + SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_IPX_DLDO_STATUS, + 1 << UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS__SHIFT, + UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS_MASK); + } + + return 0; +} + +/** + * jpeg_v5_0_0_start - start JPEG block + * + * @adev: amdgpu_device pointer + * + * Setup and start the JPEG block + */ +static int jpeg_v5_0_0_start(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec; + int r; + + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_jpeg(adev, true); + + /* disable power gating */ + r = jpeg_v5_0_0_disable_static_power_gating(adev); + if (r) + return r; + + /* JPEG disable CGC */ + jpeg_v5_0_0_disable_clock_gating(adev); + + /* MJPEG global tiling registers */ + WREG32_SOC15(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + + + /* enable JMI channel */ + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI_CNTL), 0, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + /* enable System Interrupt for JRBC */ + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regJPEG_SYS_INT_EN), + JPEG_SYS_INT_EN__DJRBC0_MASK, + ~JPEG_SYS_INT_EN__DJRBC0_MASK); + + WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_VMID, 0); + WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L)); + WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_RPTR, 0); + WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR, 0); + WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_CNTL, 0x00000002L); + WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_SIZE, ring->ring_size / 4); + ring->wptr = RREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR); + + return 0; +} + +/** + * jpeg_v5_0_0_stop - stop JPEG block + * + * @adev: amdgpu_device pointer + * + * stop the JPEG block + */ +static int jpeg_v5_0_0_stop(struct amdgpu_device *adev) +{ + int r; + + /* reset JMI */ + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI_CNTL), + UVD_JMI_CNTL__SOFT_RESET_MASK, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + jpeg_v5_0_0_enable_clock_gating(adev); + + /* enable power gating */ + r = jpeg_v5_0_0_enable_static_power_gating(adev); + if (r) + return r; + + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_jpeg(adev, false); + + return 0; +} + +/** + * jpeg_v5_0_0_dec_ring_get_rptr - get read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware read pointer + */ +static uint64_t jpeg_v5_0_0_dec_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + return RREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_RPTR); +} + +/** + * jpeg_v5_0_0_dec_ring_get_wptr - get write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware write pointer + */ +static uint64_t jpeg_v5_0_0_dec_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) + return *ring->wptr_cpu_addr; + else + return RREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR); +} + +/** + * jpeg_v5_0_0_dec_ring_set_wptr - set write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the write pointer to the hardware + */ +static void jpeg_v5_0_0_dec_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) { + *ring->wptr_cpu_addr = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr)); + } +} + +static bool jpeg_v5_0_0_is_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int ret = 1; + + ret &= (((RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS) & + UVD_JRBC_STATUS__RB_JOB_DONE_MASK) == + UVD_JRBC_STATUS__RB_JOB_DONE_MASK)); + + return ret; +} + +static int jpeg_v5_0_0_wait_for_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_JRBC_STATUS, + UVD_JRBC_STATUS__RB_JOB_DONE_MASK, + UVD_JRBC_STATUS__RB_JOB_DONE_MASK); +} + +static int jpeg_v5_0_0_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + + if (enable) { + if (!jpeg_v5_0_0_is_idle(handle)) + return -EBUSY; + jpeg_v5_0_0_enable_clock_gating(adev); + } else { + jpeg_v5_0_0_disable_clock_gating(adev); + } + + return 0; +} + +static int jpeg_v5_0_0_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int ret; + + if (state == adev->jpeg.cur_state) + return 0; + + if (state == AMD_PG_STATE_GATE) + ret = jpeg_v5_0_0_stop(adev); + else + ret = jpeg_v5_0_0_start(adev); + + if (!ret) + adev->jpeg.cur_state = state; + + return ret; +} + +static int jpeg_v5_0_0_set_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned int type, + enum amdgpu_interrupt_state state) +{ + return 0; +} + +static int jpeg_v5_0_0_process_interrupt(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_DEBUG("IH: JPEG TRAP\n"); + + switch (entry->src_id) { + case VCN_4_0__SRCID__JPEG_DECODE: + amdgpu_fence_process(adev->jpeg.inst->ring_dec); + break; + default: + DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n", + entry->src_id, entry->src_data[0]); + break; + } + + return 0; +} + +static const struct amd_ip_funcs jpeg_v5_0_0_ip_funcs = { + .name = "jpeg_v5_0_0", + .early_init = jpeg_v5_0_0_early_init, + .late_init = NULL, + .sw_init = jpeg_v5_0_0_sw_init, + .sw_fini = jpeg_v5_0_0_sw_fini, + .hw_init = jpeg_v5_0_0_hw_init, + .hw_fini = jpeg_v5_0_0_hw_fini, + .suspend = jpeg_v5_0_0_suspend, + .resume = jpeg_v5_0_0_resume, + .is_idle = jpeg_v5_0_0_is_idle, + .wait_for_idle = jpeg_v5_0_0_wait_for_idle, + .check_soft_reset = NULL, + .pre_soft_reset = NULL, + .soft_reset = NULL, + .post_soft_reset = NULL, + .set_clockgating_state = jpeg_v5_0_0_set_clockgating_state, + .set_powergating_state = jpeg_v5_0_0_set_powergating_state, +}; + +static const struct amdgpu_ring_funcs jpeg_v5_0_0_dec_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_JPEG, + .align_mask = 0xf, + .get_rptr = jpeg_v5_0_0_dec_ring_get_rptr, + .get_wptr = jpeg_v5_0_0_dec_ring_get_wptr, + .set_wptr = jpeg_v5_0_0_dec_ring_set_wptr, + .emit_frame_size = + SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + + 8 + /* jpeg_v5_0_0_dec_ring_emit_vm_flush */ + 22 + 22 + /* jpeg_v5_0_0_dec_ring_emit_fence x2 vm fence */ + 8 + 16, + .emit_ib_size = 22, /* jpeg_v5_0_0_dec_ring_emit_ib */ + .emit_ib = jpeg_v4_0_3_dec_ring_emit_ib, + .emit_fence = jpeg_v4_0_3_dec_ring_emit_fence, + .emit_vm_flush = jpeg_v4_0_3_dec_ring_emit_vm_flush, + .test_ring = amdgpu_jpeg_dec_ring_test_ring, + .test_ib = amdgpu_jpeg_dec_ring_test_ib, + .insert_nop = jpeg_v4_0_3_dec_ring_nop, + .insert_start = jpeg_v4_0_3_dec_ring_insert_start, + .insert_end = jpeg_v4_0_3_dec_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_jpeg_ring_begin_use, + .end_use = amdgpu_jpeg_ring_end_use, + .emit_wreg = jpeg_v4_0_3_dec_ring_emit_wreg, + .emit_reg_wait = jpeg_v4_0_3_dec_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +static void jpeg_v5_0_0_set_dec_ring_funcs(struct amdgpu_device *adev) +{ + adev->jpeg.inst->ring_dec->funcs = &jpeg_v5_0_0_dec_ring_vm_funcs; + DRM_DEV_INFO(adev->dev, "JPEG decode is enabled in VM mode\n"); +} + +static const struct amdgpu_irq_src_funcs jpeg_v5_0_0_irq_funcs = { + .set = jpeg_v5_0_0_set_interrupt_state, + .process = jpeg_v5_0_0_process_interrupt, +}; + +static void jpeg_v5_0_0_set_irq_funcs(struct amdgpu_device *adev) +{ + adev->jpeg.inst->irq.num_types = 1; + adev->jpeg.inst->irq.funcs = &jpeg_v5_0_0_irq_funcs; +} + +const struct amdgpu_ip_block_version jpeg_v5_0_0_ip_block = { + .type = AMD_IP_BLOCK_TYPE_JPEG, + .major = 5, + .minor = 0, + .rev = 0, + .funcs = &jpeg_v5_0_0_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.h new file mode 100644 index 0000000000..bd348336b2 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.h @@ -0,0 +1,29 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __JPEG_V5_0_0_H__ +#define __JPEG_V5_0_0_H__ + +extern const struct amdgpu_ip_block_version jpeg_v5_0_0_ip_block; + +#endif /* __JPEG_V5_0_0_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/lsdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/lsdma_v7_0.c new file mode 100644 index 0000000000..396262044e --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/lsdma_v7_0.c @@ -0,0 +1,121 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/delay.h> +#include "amdgpu.h" +#include "lsdma_v7_0.h" +#include "amdgpu_lsdma.h" + +#include "lsdma/lsdma_7_0_0_offset.h" +#include "lsdma/lsdma_7_0_0_sh_mask.h" + +static int lsdma_v7_0_wait_pio_status(struct amdgpu_device *adev) +{ + return amdgpu_lsdma_wait_for(adev, SOC15_REG_OFFSET(LSDMA, 0, regLSDMA_PIO_STATUS), + LSDMA_PIO_STATUS__PIO_IDLE_MASK | LSDMA_PIO_STATUS__PIO_FIFO_EMPTY_MASK, + LSDMA_PIO_STATUS__PIO_IDLE_MASK | LSDMA_PIO_STATUS__PIO_FIFO_EMPTY_MASK); +} + +static int lsdma_v7_0_copy_mem(struct amdgpu_device *adev, + uint64_t src_addr, + uint64_t dst_addr, + uint64_t size) +{ + int ret; + uint32_t tmp; + + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_SRC_ADDR_LO, lower_32_bits(src_addr)); + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_SRC_ADDR_HI, upper_32_bits(src_addr)); + + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_LO, lower_32_bits(dst_addr)); + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_HI, upper_32_bits(dst_addr)); + + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_CONTROL, 0x0); + + tmp = RREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND); + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, BYTE_COUNT, size); + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_LOCATION, 0); + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_LOCATION, 0); + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_ADDR_INC, 0); + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_ADDR_INC, 0); + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, OVERLAP_DISABLE, 0); + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, CONSTANT_FILL, 0); + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND, tmp); + + ret = lsdma_v7_0_wait_pio_status(adev); + if (ret) + dev_err(adev->dev, "LSDMA PIO failed to copy memory!\n"); + + return ret; +} + +static int lsdma_v7_0_fill_mem(struct amdgpu_device *adev, + uint64_t dst_addr, + uint32_t data, + uint64_t size) +{ + int ret; + uint32_t tmp; + + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_CONSTFILL_DATA, data); + + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_LO, lower_32_bits(dst_addr)); + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_HI, upper_32_bits(dst_addr)); + + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_CONTROL, 0x0); + + tmp = RREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND); + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, BYTE_COUNT, size); + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_LOCATION, 0); + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_LOCATION, 0); + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_ADDR_INC, 0); + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_ADDR_INC, 0); + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, OVERLAP_DISABLE, 0); + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, CONSTANT_FILL, 1); + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND, tmp); + + ret = lsdma_v7_0_wait_pio_status(adev); + if (ret) + dev_err(adev->dev, "LSDMA PIO failed to fill memory!\n"); + + return ret; +} + +static void lsdma_v7_0_update_memory_power_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t tmp; + + tmp = RREG32_SOC15(LSDMA, 0, regLSDMA_MEM_POWER_CTRL); + tmp = REG_SET_FIELD(tmp, LSDMA_MEM_POWER_CTRL, MEM_POWER_CTRL_EN, 0); + WREG32_SOC15(LSDMA, 0, regLSDMA_MEM_POWER_CTRL, tmp); + + tmp = REG_SET_FIELD(tmp, LSDMA_MEM_POWER_CTRL, MEM_POWER_CTRL_EN, enable); + WREG32_SOC15(LSDMA, 0, regLSDMA_MEM_POWER_CTRL, tmp); +} + +const struct amdgpu_lsdma_funcs lsdma_v7_0_funcs = { + .copy_mem = lsdma_v7_0_copy_mem, + .fill_mem = lsdma_v7_0_fill_mem, + .update_memory_power_gating = lsdma_v7_0_update_memory_power_gating +}; diff --git a/drivers/gpu/drm/amd/amdgpu/lsdma_v7_0.h b/drivers/gpu/drm/amd/amdgpu/lsdma_v7_0.h new file mode 100644 index 0000000000..52b4485cdd --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/lsdma_v7_0.h @@ -0,0 +1,31 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __LSDMA_V7_0_H__ +#define __LSDMA_V7_0_H__ + +#include "soc15_common.h" + +extern const struct amdgpu_lsdma_funcs lsdma_v7_0_funcs; + +#endif /* __LSDMA_V7_0_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 26d71a2239..63f281a998 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -49,6 +49,8 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_4_mes_2.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_4_mes1.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_0_mes_2.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_0_mes1.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_1_mes_2.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_1_mes1.bin"); static int mes_v11_0_hw_fini(void *handle); @@ -56,6 +58,7 @@ static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev); static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev); #define MES_EOP_SIZE 2048 +#define GFX_MES_DRAM_SIZE 0x80000 static void mes_v11_0_ring_set_wptr(struct amdgpu_ring *ring) { @@ -408,8 +411,11 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.enable_reg_active_poll = 1; mes_set_hw_res_pkt.enable_level_process_quantum_check = 1; mes_set_hw_res_pkt.oversubscription_timer = 50; - mes_set_hw_res_pkt.enable_mes_event_int_logging = 1; - mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr; + if (amdgpu_mes_log_enable) { + mes_set_hw_res_pkt.enable_mes_event_int_logging = 1; + mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = + mes->event_log_gpu_addr; + } return mes_v11_0_submit_pkt_and_poll_completion(mes, &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), @@ -475,7 +481,13 @@ static int mes_v11_0_allocate_ucode_data_buffer(struct amdgpu_device *adev, le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes)); fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes); - r = amdgpu_bo_create_reserved(adev, fw_size, + if (fw_size > GFX_MES_DRAM_SIZE) { + dev_err(adev->dev, "PIPE%d ucode data fw size (%d) is greater than dram size (%d)\n", + pipe, fw_size, GFX_MES_DRAM_SIZE); + return -EINVAL; + } + + r = amdgpu_bo_create_reserved(adev, GFX_MES_DRAM_SIZE, 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT, @@ -611,8 +623,8 @@ static int mes_v11_0_load_microcode(struct amdgpu_device *adev, WREG32_SOC15(GC, 0, regCP_MES_MDBASE_HI, upper_32_bits(adev->mes.data_fw_gpu_addr[pipe])); - /* Set 0x3FFFF (256K-1) to CP_MES_MDBOUND_LO */ - WREG32_SOC15(GC, 0, regCP_MES_MDBOUND_LO, 0x3FFFF); + /* Set 0x7FFFF (512K-1) to CP_MES_MDBOUND_LO */ + WREG32_SOC15(GC, 0, regCP_MES_MDBOUND_LO, 0x7FFFF); if (prime_icache) { /* invalidate ICACHE */ diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c index fb53aacdcb..c0fc44cdd6 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c @@ -33,6 +33,7 @@ #define regVM_L2_CNTL3_DEFAULT 0x80100007 #define regVM_L2_CNTL4_DEFAULT 0x000000c1 +#define mmSMNAID_AID0_MCA_SMU 0x03b30400 static u64 mmhub_v1_8_get_fb_location(struct amdgpu_device *adev) { @@ -705,8 +706,94 @@ static const struct amdgpu_ras_block_hw_ops mmhub_v1_8_ras_hw_ops = { .reset_ras_error_count = mmhub_v1_8_reset_ras_error_count, }; +static int mmhub_v1_8_aca_bank_generate_report(struct aca_handle *handle, + struct aca_bank *bank, enum aca_error_type type, + struct aca_bank_report *report, void *data) +{ + u64 status, misc0; + int ret; + + status = bank->regs[ACA_REG_IDX_STATUS]; + if ((type == ACA_ERROR_TYPE_UE && + ACA_REG__STATUS__ERRORCODEEXT(status) == ACA_EXTERROR_CODE_FAULT) || + (type == ACA_ERROR_TYPE_CE && + ACA_REG__STATUS__ERRORCODEEXT(status) == ACA_EXTERROR_CODE_CE)) { + + ret = aca_bank_info_decode(bank, &report->info); + if (ret) + return ret; + + misc0 = bank->regs[ACA_REG_IDX_MISC0]; + report->count[type] = ACA_REG__MISC0__ERRCNT(misc0); + } + + return 0; +} + +/* reference to smu driver if header file */ +static int mmhub_v1_8_err_codes[] = { + 0, 1, 2, 3, 4, /* CODE_DAGB0 - 4 */ + 5, 6, 7, 8, 9, /* CODE_EA0 - 4 */ + 10, /* CODE_UTCL2_ROUTER */ + 11, /* CODE_VML2 */ + 12, /* CODE_VML2_WALKER */ + 13, /* CODE_MMCANE */ +}; + +static bool mmhub_v1_8_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, + enum aca_error_type type, void *data) +{ + u32 instlo; + + instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]); + instlo &= GENMASK(31, 1); + + if (instlo != mmSMNAID_AID0_MCA_SMU) + return false; + + if (aca_bank_check_error_codes(handle->adev, bank, + mmhub_v1_8_err_codes, + ARRAY_SIZE(mmhub_v1_8_err_codes))) + return false; + + return true; +} + +static const struct aca_bank_ops mmhub_v1_8_aca_bank_ops = { + .aca_bank_generate_report = mmhub_v1_8_aca_bank_generate_report, + .aca_bank_is_valid = mmhub_v1_8_aca_bank_is_valid, +}; + +static const struct aca_info mmhub_v1_8_aca_info = { + .hwip = ACA_HWIP_TYPE_SMU, + .mask = ACA_ERROR_UE_MASK, + .bank_ops = &mmhub_v1_8_aca_bank_ops, +}; + +static int mmhub_v1_8_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) +{ + int r; + + r = amdgpu_ras_block_late_init(adev, ras_block); + if (r) + return r; + + r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__MMHUB, + &mmhub_v1_8_aca_info, NULL); + if (r) + goto late_fini; + + return 0; + +late_fini: + amdgpu_ras_block_late_fini(adev, ras_block); + + return r; +} + struct amdgpu_mmhub_ras mmhub_v1_8_ras = { .ras_block = { .hw_ops = &mmhub_v1_8_ras_hw_ops, + .ras_late_init = mmhub_v1_8_ras_late_init, }, }; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 63725b2ebc..a2bd2c3b1e 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -404,7 +404,8 @@ static int xgpu_ai_request_init_data(struct amdgpu_device *adev) return xgpu_ai_send_access_requests(adev, IDH_REQ_GPU_INIT_DATA); } -static void xgpu_ai_ras_poison_handler(struct amdgpu_device *adev) +static void xgpu_ai_ras_poison_handler(struct amdgpu_device *adev, + enum amdgpu_ras_block block) { xgpu_ai_send_access_requests(adev, IDH_RAS_POISON); } diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c index 6a68ee946f..77f5b55dec 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c @@ -152,14 +152,14 @@ static void xgpu_nv_mailbox_trans_msg (struct amdgpu_device *adev, xgpu_nv_mailbox_set_valid(adev, false); } -static int xgpu_nv_send_access_requests(struct amdgpu_device *adev, - enum idh_request req) +static int xgpu_nv_send_access_requests_with_param(struct amdgpu_device *adev, + enum idh_request req, u32 data1, u32 data2, u32 data3) { int r, retry = 1; enum idh_event event = -1; send_request: - xgpu_nv_mailbox_trans_msg(adev, req, 0, 0, 0); + xgpu_nv_mailbox_trans_msg(adev, req, data1, data2, data3); switch (req) { case IDH_REQ_GPU_INIT_ACCESS: @@ -170,6 +170,10 @@ send_request: case IDH_REQ_GPU_INIT_DATA: event = IDH_REQ_GPU_INIT_DATA_READY; break; + case IDH_RAS_POISON: + if (data1 != 0) + event = IDH_RAS_POISON_READY; + break; default: break; } @@ -206,6 +210,13 @@ send_request: return 0; } +static int xgpu_nv_send_access_requests(struct amdgpu_device *adev, + enum idh_request req) +{ + return xgpu_nv_send_access_requests_with_param(adev, + req, 0, 0, 0); +} + static int xgpu_nv_request_reset(struct amdgpu_device *adev) { int ret, i = 0; @@ -424,9 +435,17 @@ void xgpu_nv_mailbox_put_irq(struct amdgpu_device *adev) amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0); } -static void xgpu_nv_ras_poison_handler(struct amdgpu_device *adev) +static void xgpu_nv_ras_poison_handler(struct amdgpu_device *adev, + enum amdgpu_ras_block block) { - xgpu_nv_send_access_requests(adev, IDH_RAS_POISON); + if (amdgpu_ip_version(adev, UMC_HWIP, 0) < IP_VERSION(12, 0, 0)) { + xgpu_nv_send_access_requests(adev, IDH_RAS_POISON); + } else { + amdgpu_virt_fini_data_exchange(adev); + xgpu_nv_send_access_requests_with_param(adev, + IDH_RAS_POISON, block, 0, 0); + amdgpu_virt_init_data_exchange(adev); + } } const struct amdgpu_virt_ops xgpu_nv_virt_ops = { diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h index d0221ce087..1e8fd90cab 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h @@ -51,6 +51,7 @@ enum idh_event { IDH_FAIL, IDH_QUERY_ALIVE, IDH_REQ_GPU_INIT_DATA_READY, + IDH_RAS_POISON_READY, IDH_TEXT_MESSAGE = 255, }; diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c index de93614726..4178f4e5da 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -728,8 +728,7 @@ static void navi10_ih_set_interrupt_funcs(struct amdgpu_device *adev) adev->irq.ih_funcs = &navi10_ih_funcs; } -const struct amdgpu_ip_block_version navi10_ih_ip_block = -{ +const struct amdgpu_ip_block_version navi10_ih_ip_block = { .type = AMD_IP_BLOCK_TYPE_IH, .major = 5, .minor = 0, diff --git a/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c new file mode 100644 index 0000000000..96ed00ac81 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c @@ -0,0 +1,495 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "amdgpu_atombios.h" +#include "nbif_v6_3_1.h" + +#include "nbif/nbif_6_3_1_offset.h" +#include "nbif/nbif_6_3_1_sh_mask.h" +#include "pcie/pcie_6_1_0_offset.h" +#include "pcie/pcie_6_1_0_sh_mask.h" +#include <uapi/linux/kfd_ioctl.h> + +static void nbif_v6_3_1_remap_hdp_registers(struct amdgpu_device *adev) +{ + WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_MEM_FLUSH_CNTL, + adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL); + WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_REG_FLUSH_CNTL, + adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL); +} + +static u32 nbif_v6_3_1_get_rev_id(struct amdgpu_device *adev) +{ + u32 tmp = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0); + + tmp &= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK; + tmp >>= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT; + + return tmp; +} + +static void nbif_v6_3_1_mc_access_enable(struct amdgpu_device *adev, bool enable) +{ + if (enable) + WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN, + BIF_BX0_BIF_FB_EN__FB_READ_EN_MASK | + BIF_BX0_BIF_FB_EN__FB_WRITE_EN_MASK); + else + WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN, 0); +} + +static u32 nbif_v6_3_1_get_memsize(struct amdgpu_device *adev) +{ + return RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF0_RCC_CONFIG_MEMSIZE); +} + +static void nbif_v6_3_1_sdma_doorbell_range(struct amdgpu_device *adev, + int instance, bool use_doorbell, + int doorbell_index, + int doorbell_size) +{ + if (instance == 0) { + u32 doorbell_range = RREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL); + + if (use_doorbell) { + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL, + S2A_DOORBELL_PORT2_ENABLE, + 0x1); + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL, + S2A_DOORBELL_PORT2_AWID, + 0xe); + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL, + S2A_DOORBELL_PORT2_RANGE_OFFSET, + doorbell_index); + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL, + S2A_DOORBELL_PORT2_RANGE_SIZE, + doorbell_size); + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL, + S2A_DOORBELL_PORT2_AWADDR_31_28_VALUE, + 0x3); + } else + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL, + S2A_DOORBELL_PORT2_RANGE_SIZE, + 0); + + WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL, doorbell_range); + } +} + +static void nbif_v6_3_1_vcn_doorbell_range(struct amdgpu_device *adev, + bool use_doorbell, int doorbell_index, + int instance) +{ + u32 doorbell_range; + + if (instance) + doorbell_range = RREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_5_CTRL); + else + doorbell_range = RREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL); + + if (use_doorbell) { + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL, + S2A_DOORBELL_PORT4_ENABLE, + 0x1); + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL, + S2A_DOORBELL_PORT4_AWID, + instance ? 0x7 : 0x4); + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL, + S2A_DOORBELL_PORT4_RANGE_OFFSET, + doorbell_index); + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL, + S2A_DOORBELL_PORT4_RANGE_SIZE, + 8); + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL, + S2A_DOORBELL_PORT4_AWADDR_31_28_VALUE, + instance ? 0x7 : 0x4); + } else + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL, + S2A_DOORBELL_PORT4_RANGE_SIZE, + 0); + + if (instance) + WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_5_CTRL, doorbell_range); + else + WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL, doorbell_range); +} + +static void nbif_v6_3_1_gc_doorbell_init(struct amdgpu_device *adev) +{ + WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_0_CTRL, 0x30000007); + WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_3_CTRL, 0x3000000d); +} + +static void nbif_v6_3_1_enable_doorbell_aperture(struct amdgpu_device *adev, + bool enable) +{ + WREG32_FIELD15_PREREG(NBIO, 0, RCC_DEV0_EPF0_RCC_DOORBELL_APER_EN, + BIF_DOORBELL_APER_EN, enable ? 1 : 0); +} + +static void +nbif_v6_3_1_enable_doorbell_selfring_aperture(struct amdgpu_device *adev, + bool enable) +{ + u32 tmp = 0; + + if (enable) { + tmp = REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, + DOORBELL_SELFRING_GPA_APER_EN, 1) | + REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, + DOORBELL_SELFRING_GPA_APER_MODE, 1) | + REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, + DOORBELL_SELFRING_GPA_APER_SIZE, 0); + + WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_LOW, + lower_32_bits(adev->doorbell.base)); + WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_HIGH, + upper_32_bits(adev->doorbell.base)); + } + + WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, tmp); +} + +static void nbif_v6_3_1_ih_doorbell_range(struct amdgpu_device *adev, + bool use_doorbell, int doorbell_index) +{ + u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL); + + if (use_doorbell) { + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL, + S2A_DOORBELL_PORT1_ENABLE, + 0x1); + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL, + S2A_DOORBELL_PORT1_AWID, + 0x0); + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL, + S2A_DOORBELL_PORT1_RANGE_OFFSET, + doorbell_index); + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL, + S2A_DOORBELL_PORT1_RANGE_SIZE, + 2); + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL, + S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE, + 0x0); + } else + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL, + S2A_DOORBELL_PORT1_RANGE_SIZE, + 0); + + WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL, ih_doorbell_range); +} + +static void nbif_v6_3_1_ih_control(struct amdgpu_device *adev) +{ + u32 interrupt_cntl; + + /* setup interrupt control */ + WREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL2, adev->dummy_page_addr >> 8); + + interrupt_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL); + /* + * BIF_BX0_INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi + * BIF_BX0_INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN + */ + interrupt_cntl = REG_SET_FIELD(interrupt_cntl, BIF_BX0_INTERRUPT_CNTL, + IH_DUMMY_RD_OVERRIDE, 0); + + /* BIF_BX0_INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */ + interrupt_cntl = REG_SET_FIELD(interrupt_cntl, BIF_BX0_INTERRUPT_CNTL, + IH_REQ_NONSNOOP_EN, 0); + + WREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL, interrupt_cntl); +} + +static void +nbif_v6_3_1_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ +} + +static void +nbif_v6_3_1_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) +{ +} + +static void +nbif_v6_3_1_get_clockgating_state(struct amdgpu_device *adev, + u64 *flags) +{ +} + +static u32 nbif_v6_3_1_get_hdp_flush_req_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_GPU_HDP_FLUSH_REQ); +} + +static u32 nbif_v6_3_1_get_hdp_flush_done_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_GPU_HDP_FLUSH_DONE); +} + +static u32 nbif_v6_3_1_get_pcie_index_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_RSMU_INDEX); +} + +static u32 nbif_v6_3_1_get_pcie_data_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_RSMU_DATA); +} + +const struct nbio_hdp_flush_reg nbif_v6_3_1_hdp_flush_reg = { + .ref_and_mask_cp0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP0_MASK, + .ref_and_mask_cp1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP1_MASK, + .ref_and_mask_cp2 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP2_MASK, + .ref_and_mask_cp3 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP3_MASK, + .ref_and_mask_cp4 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP4_MASK, + .ref_and_mask_cp5 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP5_MASK, + .ref_and_mask_cp6 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP6_MASK, + .ref_and_mask_cp7 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP7_MASK, + .ref_and_mask_cp8 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP8_MASK, + .ref_and_mask_cp9 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP9_MASK, + .ref_and_mask_sdma0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA0_MASK, + .ref_and_mask_sdma1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA1_MASK, +}; + +static void nbif_v6_3_1_init_registers(struct amdgpu_device *adev) +{ + uint32_t data; + + data = RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF2_STRAP2); + data &= ~RCC_DEV0_EPF2_STRAP2__STRAP_NO_SOFT_RESET_DEV0_F2_MASK; + WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF2_STRAP2, data); +} + +static u32 nbif_v6_3_1_get_rom_offset(struct amdgpu_device *adev) +{ + u32 data, rom_offset; + + data = RREG32_SOC15(NBIO, 0, regREGS_ROM_OFFSET_CTRL); + rom_offset = REG_GET_FIELD(data, REGS_ROM_OFFSET_CTRL, ROM_OFFSET); + + return rom_offset; +} + +#ifdef CONFIG_PCIEASPM +static void nbif_v6_3_1_program_ltr(struct amdgpu_device *adev) +{ + uint32_t def, data; + + def = RREG32_SOC15(NBIO, 0, regRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL); + data = 0x35EB; + data &= ~RCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL__LTR_PRIV_MSG_DIS_IN_PM_NON_D0_MASK; + data &= ~RCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL__LTR_PRIV_RST_LTR_IN_DL_DOWN_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL, data); + + def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP2); + data &= ~RCC_STRAP0_RCC_BIF_STRAP2__STRAP_LTR_IN_ASPML1_DIS_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP2, data); + + def = data = RREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2); + if (adev->pdev->ltr_path) + data |= BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK; + else + data &= ~BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data); +} +#endif + +static void nbif_v6_3_1_program_aspm(struct amdgpu_device *adev) +{ +#ifdef CONFIG_PCIEASPM + uint32_t def, data; + + def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL); + data &= ~PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK; + data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK; + data |= PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK; + if (def != data) + WREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL, data); + + def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL7); + data |= PCIE_LC_CNTL7__LC_NBIF_ASPM_INPUT_EN_MASK; + if (def != data) + WREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL7, data); + + def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL3); + data |= PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK; + if (def != data) + WREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL3, data); + + def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3); + data &= ~RCC_STRAP0_RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER_MASK; + data &= ~RCC_STRAP0_RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3, data); + + def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5); + data &= ~RCC_STRAP0_RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5, data); + + def = data = RREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2); + data &= ~BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data); + + WREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_PCIE_LTR_CAP, 0x10011001); + +#if 0 + /* regPSWUSP0_PCIE_LC_CNTL2 should be replace by PCIE_LC_CNTL2 or someone else ? */ + def = data = RREG32_SOC15(NBIO, 0, regPSWUSP0_PCIE_LC_CNTL2); + data |= PSWUSP0_PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L1_MASK | + PSWUSP0_PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L23_MASK; + data &= ~PSWUSP0_PCIE_LC_CNTL2__LC_RCV_L0_TO_RCV_L0S_DIS_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regPSWUSP0_PCIE_LC_CNTL2, data); +#endif + def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL4); + data |= PCIE_LC_CNTL4__LC_L1_POWERDOWN_MASK; + if (def != data) + WREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL4, data); + + def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_RXRECOVER_RXSTANDBY_CNTL); + data |= PCIE_LC_RXRECOVER_RXSTANDBY_CNTL__LC_RX_L0S_STANDBY_EN_MASK; + if (def != data) + WREG32_SOC15(PCIE, 0, regPCIE_LC_RXRECOVER_RXSTANDBY_CNTL, data); + + nbif_v6_3_1_program_ltr(adev); + + def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3); + data |= 0x5DE0 << RCC_STRAP0_RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT; + data |= 0x0010 << RCC_STRAP0_RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER__SHIFT; + if (def != data) + WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3, data); + + def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5); + data |= 0x0010 << RCC_STRAP0_RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER__SHIFT; + if (def != data) + WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5, data); + + def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL); + data |= 0x0 << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT; + data |= 0x9 << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT; + data &= ~PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK; + if (def != data) + WREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL, data); + + def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL3); + data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK; + if (def != data) + WREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL3, data); +#endif +} + +const struct amdgpu_nbio_funcs nbif_v6_3_1_funcs = { + .get_hdp_flush_req_offset = nbif_v6_3_1_get_hdp_flush_req_offset, + .get_hdp_flush_done_offset = nbif_v6_3_1_get_hdp_flush_done_offset, + .get_pcie_index_offset = nbif_v6_3_1_get_pcie_index_offset, + .get_pcie_data_offset = nbif_v6_3_1_get_pcie_data_offset, + .get_rev_id = nbif_v6_3_1_get_rev_id, + .mc_access_enable = nbif_v6_3_1_mc_access_enable, + .get_memsize = nbif_v6_3_1_get_memsize, + .sdma_doorbell_range = nbif_v6_3_1_sdma_doorbell_range, + .vcn_doorbell_range = nbif_v6_3_1_vcn_doorbell_range, + .gc_doorbell_init = nbif_v6_3_1_gc_doorbell_init, + .enable_doorbell_aperture = nbif_v6_3_1_enable_doorbell_aperture, + .enable_doorbell_selfring_aperture = nbif_v6_3_1_enable_doorbell_selfring_aperture, + .ih_doorbell_range = nbif_v6_3_1_ih_doorbell_range, + .update_medium_grain_clock_gating = nbif_v6_3_1_update_medium_grain_clock_gating, + .update_medium_grain_light_sleep = nbif_v6_3_1_update_medium_grain_light_sleep, + .get_clockgating_state = nbif_v6_3_1_get_clockgating_state, + .ih_control = nbif_v6_3_1_ih_control, + .init_registers = nbif_v6_3_1_init_registers, + .remap_hdp_registers = nbif_v6_3_1_remap_hdp_registers, + .get_rom_offset = nbif_v6_3_1_get_rom_offset, + .program_aspm = nbif_v6_3_1_program_aspm, +}; + + +static void nbif_v6_3_1_sriov_ih_doorbell_range(struct amdgpu_device *adev, + bool use_doorbell, int doorbell_index) +{ +} + +static void nbif_v6_3_1_sriov_sdma_doorbell_range(struct amdgpu_device *adev, + int instance, bool use_doorbell, + int doorbell_index, + int doorbell_size) +{ +} + +static void nbif_v6_3_1_sriov_vcn_doorbell_range(struct amdgpu_device *adev, + bool use_doorbell, + int doorbell_index, int instance) +{ +} + +static void nbif_v6_3_1_sriov_gc_doorbell_init(struct amdgpu_device *adev) +{ +} + +const struct amdgpu_nbio_funcs nbif_v6_3_1_sriov_funcs = { + .get_hdp_flush_req_offset = nbif_v6_3_1_get_hdp_flush_req_offset, + .get_hdp_flush_done_offset = nbif_v6_3_1_get_hdp_flush_done_offset, + .get_pcie_index_offset = nbif_v6_3_1_get_pcie_index_offset, + .get_pcie_data_offset = nbif_v6_3_1_get_pcie_data_offset, + .get_rev_id = nbif_v6_3_1_get_rev_id, + .mc_access_enable = nbif_v6_3_1_mc_access_enable, + .get_memsize = nbif_v6_3_1_get_memsize, + .sdma_doorbell_range = nbif_v6_3_1_sriov_sdma_doorbell_range, + .vcn_doorbell_range = nbif_v6_3_1_sriov_vcn_doorbell_range, + .gc_doorbell_init = nbif_v6_3_1_sriov_gc_doorbell_init, + .enable_doorbell_aperture = nbif_v6_3_1_enable_doorbell_aperture, + .enable_doorbell_selfring_aperture = nbif_v6_3_1_enable_doorbell_selfring_aperture, + .ih_doorbell_range = nbif_v6_3_1_sriov_ih_doorbell_range, + .update_medium_grain_clock_gating = nbif_v6_3_1_update_medium_grain_clock_gating, + .update_medium_grain_light_sleep = nbif_v6_3_1_update_medium_grain_light_sleep, + .get_clockgating_state = nbif_v6_3_1_get_clockgating_state, + .ih_control = nbif_v6_3_1_ih_control, + .init_registers = nbif_v6_3_1_init_registers, + .remap_hdp_registers = nbif_v6_3_1_remap_hdp_registers, + .get_rom_offset = nbif_v6_3_1_get_rom_offset, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.h b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.h new file mode 100644 index 0000000000..b7f2e0d889 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.h @@ -0,0 +1,33 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __NBIO_V6_3_1_H__ +#define __NBIO_V6_3_1_H__ + +#include "soc15_common.h" + +extern const struct nbio_hdp_flush_reg nbif_v6_3_1_hdp_flush_reg; +extern const struct amdgpu_nbio_funcs nbif_v6_3_1_funcs; +extern const struct amdgpu_nbio_funcs nbif_v6_3_1_sriov_funcs; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c index 1f52b4b1db..05020141c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c @@ -89,7 +89,9 @@ static void nbio_v7_11_vpe_doorbell_range(struct amdgpu_device *adev, int instan bool use_doorbell, int doorbell_index, int doorbell_size) { - u32 reg = SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_VPE_DOORBELL_RANGE); + u32 reg = instance == 0 ? + SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_VPE_DOORBELL_RANGE) : + SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_VPE1_DOORBELL_RANGE); u32 doorbell_range = RREG32_PCIE_PORT(reg); if (use_doorbell) { @@ -112,7 +114,10 @@ static void nbio_v7_11_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell, int doorbell_index, int instance) { - u32 reg = SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_VCN0_DOORBELL_RANGE); + u32 reg = instance == 0 ? + SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_VCN0_DOORBELL_RANGE): + SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_VCN1_DOORBELL_RANGE); + u32 doorbell_range = RREG32_PCIE_PORT(reg); if (use_doorbell) { diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c index b4723d68ea..40d1e209ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c @@ -35,15 +35,6 @@ /* Core 0 Port 0 counter */ #define smnPCIEP_NAK_COUNTER 0x1A340218 -#define smnPCIE_PERF_CNTL_TXCLK3 0x1A38021c -#define smnPCIE_PERF_CNTL_TXCLK7 0x1A380888 -#define smnPCIE_PERF_COUNT_CNTL 0x1A380200 -#define smnPCIE_PERF_COUNT0_TXCLK3 0x1A380220 -#define smnPCIE_PERF_COUNT0_TXCLK7 0x1A38088C -#define smnPCIE_PERF_COUNT0_UPVAL_TXCLK3 0x1A3808F8 -#define smnPCIE_PERF_COUNT0_UPVAL_TXCLK7 0x1A380918 - - static void nbio_v7_9_remap_hdp_registers(struct amdgpu_device *adev) { WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_MEM_FLUSH_CNTL, @@ -484,59 +475,6 @@ static u64 nbio_v7_9_get_pcie_replay_count(struct amdgpu_device *adev) return (nak_r + nak_g); } -static void nbio_v7_9_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, - uint64_t *count1) -{ - uint32_t perfctrrx = 0; - uint32_t perfctrtx = 0; - - /* This reports 0 on APUs, so return to avoid writing/reading registers - * that may or may not be different from their GPU counterparts - */ - if (adev->flags & AMD_IS_APU) - return; - - /* Use TXCLK3 counter group for rx event */ - /* Use TXCLK7 counter group for tx event */ - /* Set the 2 events that we wish to watch, defined above */ - /* 40 is event# for received msgs */ - /* 2 is event# of posted requests sent */ - perfctrrx = REG_SET_FIELD(perfctrrx, PCIE_PERF_CNTL_TXCLK3, EVENT0_SEL, 40); - perfctrtx = REG_SET_FIELD(perfctrtx, PCIE_PERF_CNTL_TXCLK7, EVENT0_SEL, 2); - - /* Write to enable desired perf counters */ - WREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK3, perfctrrx); - WREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK7, perfctrtx); - - /* Zero out and enable SHADOW_WR - * Write 0x6: - * Bit 1 = Global Shadow wr(1) - * Bit 2 = Global counter reset enable(1) - */ - WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000006); - - /* Enable Gloabl Counter - * Write 0x1: - * Bit 0 = Global Counter Enable(1) - */ - WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000001); - - msleep(1000); - - /* Disable Global Counter, Reset and enable SHADOW_WR - * Write 0x6: - * Bit 1 = Global Shadow wr(1) - * Bit 2 = Global counter reset enable(1) - */ - WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000006); - - /* Get the upper and lower count */ - *count0 = RREG32_PCIE(smnPCIE_PERF_COUNT0_TXCLK3) | - ((uint64_t)RREG32_PCIE(smnPCIE_PERF_COUNT0_UPVAL_TXCLK3) << 32); - *count1 = RREG32_PCIE(smnPCIE_PERF_COUNT0_TXCLK7) | - ((uint64_t)RREG32_PCIE(smnPCIE_PERF_COUNT0_UPVAL_TXCLK7) << 32); -} - const struct amdgpu_nbio_funcs nbio_v7_9_funcs = { .get_hdp_flush_req_offset = nbio_v7_9_get_hdp_flush_req_offset, .get_hdp_flush_done_offset = nbio_v7_9_get_hdp_flush_done_offset, @@ -561,7 +499,6 @@ const struct amdgpu_nbio_funcs nbio_v7_9_funcs = { .get_memory_partition_mode = nbio_v7_9_get_memory_partition_mode, .init_registers = nbio_v7_9_init_registers, .get_pcie_replay_count = nbio_v7_9_get_pcie_replay_count, - .get_pcie_usage = nbio_v7_9_get_pcie_usage, }; static void nbio_v7_9_query_ras_error_count(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index 4bb5e10217..7566973ed8 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -296,6 +296,7 @@ enum psp_gfx_fw_type { GFX_FW_TYPE_VPEC_FW1 = 100, /* VPEC FW1 To Save VPE */ GFX_FW_TYPE_VPEC_FW2 = 101, /* VPEC FW2 To Save VPE */ GFX_FW_TYPE_VPE = 102, + GFX_FW_TYPE_JPEG_RAM = 128, /**< JPEG Command buffer */ GFX_FW_TYPE_P2S_TABLE = 129, GFX_FW_TYPE_MAX }; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index efa37e3b79..2395f18569 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -506,7 +506,7 @@ static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops) * before training, and restore it after training to avoid * VRAM corruption. */ - sz = GDDR6_MEM_TRAINING_ENCROACHED_SIZE; + sz = BIST_MEM_TRAINING_ENCROACHED_SIZE; if (adev->gmc.visible_vram_size < sz || !adev->mman.aper_base_kaddr) { DRM_ERROR("visible_vram_size %llx or aper_base_kaddr %p is not initialized.\n", diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index df1844d080..0da50ea46e 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -27,6 +27,7 @@ #include "amdgpu_ucode.h" #include "soc15_common.h" #include "psp_v13_0.h" +#include "amdgpu_ras.h" #include "mp/mp_13_0_2_offset.h" #include "mp/mp_13_0_2_sh_mask.h" @@ -52,6 +53,8 @@ MODULE_FIRMWARE("amdgpu/psp_13_0_6_sos.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_6_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_14_0_0_toc.bin"); MODULE_FIRMWARE("amdgpu/psp_14_0_0_ta.bin"); +MODULE_FIRMWARE("amdgpu/psp_14_0_1_toc.bin"); +MODULE_FIRMWARE("amdgpu/psp_14_0_1_ta.bin"); /* For large FW files the time to complete can be very long */ #define USBC_PD_POLLING_LIMIT_S 240 @@ -100,6 +103,7 @@ static int psp_v13_0_init_microcode(struct psp_context *psp) case IP_VERSION(13, 0, 8): case IP_VERSION(13, 0, 11): case IP_VERSION(14, 0, 0): + case IP_VERSION(14, 0, 1): err = psp_init_toc_microcode(psp, ucode_prefix); if (err) return err; @@ -187,11 +191,18 @@ static int psp_v13_0_wait_for_bootloader(struct psp_context *psp) static int psp_v13_0_wait_for_bootloader_steady_state(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; + int ret; if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6)) { - psp_v13_0_wait_for_vmbx_ready(psp); + ret = psp_v13_0_wait_for_vmbx_ready(psp); + if (ret) + amdgpu_ras_query_boot_status(adev, 4); + + ret = psp_v13_0_wait_for_bootloader(psp); + if (ret) + amdgpu_ras_query_boot_status(adev, 4); - return psp_v13_0_wait_for_bootloader(psp); + return ret; } return 0; @@ -553,7 +564,7 @@ static int psp_v13_0_memory_training(struct psp_context *psp, uint32_t ops) * before training, and restore it after training to avoid * VRAM corruption. */ - sz = GDDR6_MEM_TRAINING_ENCROACHED_SIZE; + sz = BIST_MEM_TRAINING_ENCROACHED_SIZE; if (adev->gmc.visible_vram_size < sz || !adev->mman.aper_base_kaddr) { dev_err(adev->dev, "visible_vram_size %llx or aper_base_kaddr %p is not initialized.\n", @@ -763,81 +774,28 @@ static int psp_v13_0_fatal_error_recovery_quirk(struct psp_context *psp) return 0; } - -static void psp_v13_0_boot_error_reporting(struct amdgpu_device *adev, - uint32_t inst, - uint32_t boot_error) -{ - uint32_t socket_id; - uint32_t aid_id; - uint32_t hbm_id; - uint32_t reg_data; - - socket_id = REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, SOCKET_ID); - aid_id = REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, AID_ID); - hbm_id = REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, HBM_ID); - - reg_data = RREG32_SOC15(MP0, inst, regMP0_SMN_C2PMSG_109); - dev_info(adev->dev, "socket: %d, aid: %d, firmware boot failed, fw status is 0x%x\n", - socket_id, aid_id, reg_data); - - if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_MEM_TRAINING)) - dev_info(adev->dev, "socket: %d, aid: %d, hbm: %d, memory training failed\n", - socket_id, aid_id, hbm_id); - - if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_FW_LOAD)) - dev_info(adev->dev, "socket: %d, aid: %d, firmware load failed at boot time\n", - socket_id, aid_id); - - if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_WAFL_LINK_TRAINING)) - dev_info(adev->dev, "socket: %d, aid: %d, wafl link training failed\n", - socket_id, aid_id); - - if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_XGMI_LINK_TRAINING)) - dev_info(adev->dev, "socket: %d, aid: %d, xgmi link training failed\n", - socket_id, aid_id); - - if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_USR_CP_LINK_TRAINING)) - dev_info(adev->dev, "socket: %d, aid: %d, usr cp link training failed\n", - socket_id, aid_id); - - if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_USR_DP_LINK_TRAINING)) - dev_info(adev->dev, "socket: %d, aid: %d, usr dp link training failed\n", - socket_id, aid_id); - - if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_HBM_MEM_TEST)) - dev_info(adev->dev, "socket: %d, aid: %d, hbm: %d, hbm memory test failed\n", - socket_id, aid_id, hbm_id); - - if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_HBM_BIST_TEST)) - dev_info(adev->dev, "socket: %d, aid: %d, hbm: %d, hbm bist test failed\n", - socket_id, aid_id, hbm_id); -} - -static int psp_v13_0_query_boot_status(struct psp_context *psp) +static bool psp_v13_0_get_ras_capability(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; - int inst_mask = adev->aid_mask; - uint32_t reg_data; - uint32_t i; - int ret = 0; + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + u32 reg_data; - if (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6)) - return 0; + /* query ras cap should be done from host side */ + if (amdgpu_sriov_vf(adev)) + return false; - if (RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_59) < 0x00a10109) - return 0; + if (!con) + return false; - for_each_inst(i, inst_mask) { - reg_data = RREG32_SOC15(MP0, i, regMP0_SMN_C2PMSG_126); - if (!REG_GET_FIELD(reg_data, MP0_SMN_C2PMSG_126, BOOT_STATUS)) { - psp_v13_0_boot_error_reporting(adev, i, reg_data); - ret = -EINVAL; - break; - } + if ((amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6)) && + (!(adev->flags & AMD_IS_APU))) { + reg_data = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_127); + adev->ras_hw_enabled = (reg_data & GENMASK_ULL(23, 0)); + con->poison_supported = ((reg_data & GENMASK_ULL(24, 24)) >> 24) ? true : false; + return true; + } else { + return false; } - - return ret; } static const struct psp_funcs psp_v13_0_funcs = { @@ -862,7 +820,7 @@ static const struct psp_funcs psp_v13_0_funcs = { .update_spirom = psp_v13_0_update_spirom, .vbflash_stat = psp_v13_0_vbflash_status, .fatal_error_recovery_quirk = psp_v13_0_fatal_error_recovery_quirk, - .query_boot_status = psp_v13_0_query_boot_status, + .get_ras_capability = psp_v13_0_get_ras_capability, }; void psp_v13_0_set_psp_funcs(struct psp_context *psp) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c new file mode 100644 index 0000000000..78a95f8f37 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c @@ -0,0 +1,672 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include <drm/drm_drv.h> +#include <linux/vmalloc.h> +#include "amdgpu.h" +#include "amdgpu_psp.h" +#include "amdgpu_ucode.h" +#include "soc15_common.h" +#include "psp_v14_0.h" + +#include "mp/mp_14_0_2_offset.h" +#include "mp/mp_14_0_2_sh_mask.h" + +MODULE_FIRMWARE("amdgpu/psp_14_0_2_sos.bin"); +MODULE_FIRMWARE("amdgpu/psp_14_0_3_sos.bin"); + +/* For large FW files the time to complete can be very long */ +#define USBC_PD_POLLING_LIMIT_S 240 + +/* Read USB-PD from LFB */ +#define GFX_CMD_USB_PD_USE_LFB 0x480 + +/* VBIOS gfl defines */ +#define MBOX_READY_MASK 0x80000000 +#define MBOX_STATUS_MASK 0x0000FFFF +#define MBOX_COMMAND_MASK 0x00FF0000 +#define MBOX_READY_FLAG 0x80000000 +#define C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_LO 0x2 +#define C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_HI 0x3 +#define C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE 0x4 + +/* memory training timeout define */ +#define MEM_TRAIN_SEND_MSG_TIMEOUT_US 3000000 + +static int psp_v14_0_init_microcode(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + char ucode_prefix[30]; + int err = 0; + + amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix, sizeof(ucode_prefix)); + + switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) { + case IP_VERSION(14, 0, 2): + case IP_VERSION(14, 0, 3): + err = psp_init_sos_microcode(psp, ucode_prefix); + if (err) + return err; + break; + default: + BUG(); + } + + return 0; +} + +static bool psp_v14_0_is_sos_alive(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + uint32_t sol_reg; + + sol_reg = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_81); + + return sol_reg != 0x0; +} + +static int psp_v14_0_wait_for_bootloader(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + + int ret; + int retry_loop; + + for (retry_loop = 0; retry_loop < 10; retry_loop++) { + /* Wait for bootloader to signify that is + ready having bit 31 of C2PMSG_35 set to 1 */ + ret = psp_wait_for(psp, + SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_35), + 0x80000000, + 0x80000000, + false); + + if (ret == 0) + return 0; + } + + return ret; +} + +static int psp_v14_0_bootloader_load_component(struct psp_context *psp, + struct psp_bin_desc *bin_desc, + enum psp_bootloader_cmd bl_cmd) +{ + int ret; + uint32_t psp_gfxdrv_command_reg = 0; + struct amdgpu_device *adev = psp->adev; + + /* Check tOS sign of life register to confirm sys driver and sOS + * are already been loaded. + */ + if (psp_v14_0_is_sos_alive(psp)) + return 0; + + ret = psp_v14_0_wait_for_bootloader(psp); + if (ret) + return ret; + + memset(psp->fw_pri_buf, 0, PSP_1_MEG); + + /* Copy PSP KDB binary to memory */ + memcpy(psp->fw_pri_buf, bin_desc->start_addr, bin_desc->size_bytes); + + /* Provide the PSP KDB to bootloader */ + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_36, + (uint32_t)(psp->fw_pri_mc_addr >> 20)); + psp_gfxdrv_command_reg = bl_cmd; + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_35, + psp_gfxdrv_command_reg); + + ret = psp_v14_0_wait_for_bootloader(psp); + + return ret; +} + +static int psp_v14_0_bootloader_load_kdb(struct psp_context *psp) +{ + return psp_v14_0_bootloader_load_component(psp, &psp->kdb, PSP_BL__LOAD_KEY_DATABASE); +} + +static int psp_v14_0_bootloader_load_spl(struct psp_context *psp) +{ + return psp_v14_0_bootloader_load_component(psp, &psp->kdb, PSP_BL__LOAD_TOS_SPL_TABLE); +} + +static int psp_v14_0_bootloader_load_sysdrv(struct psp_context *psp) +{ + return psp_v14_0_bootloader_load_component(psp, &psp->sys, PSP_BL__LOAD_SYSDRV); +} + +static int psp_v14_0_bootloader_load_soc_drv(struct psp_context *psp) +{ + return psp_v14_0_bootloader_load_component(psp, &psp->soc_drv, PSP_BL__LOAD_SOCDRV); +} + +static int psp_v14_0_bootloader_load_intf_drv(struct psp_context *psp) +{ + return psp_v14_0_bootloader_load_component(psp, &psp->intf_drv, PSP_BL__LOAD_INTFDRV); +} + +static int psp_v14_0_bootloader_load_dbg_drv(struct psp_context *psp) +{ + return psp_v14_0_bootloader_load_component(psp, &psp->dbg_drv, PSP_BL__LOAD_DBGDRV); +} + +static int psp_v14_0_bootloader_load_ras_drv(struct psp_context *psp) +{ + return psp_v14_0_bootloader_load_component(psp, &psp->ras_drv, PSP_BL__LOAD_RASDRV); +} + + +static int psp_v14_0_bootloader_load_sos(struct psp_context *psp) +{ + int ret; + unsigned int psp_gfxdrv_command_reg = 0; + struct amdgpu_device *adev = psp->adev; + + /* Check sOS sign of life register to confirm sys driver and sOS + * are already been loaded. + */ + if (psp_v14_0_is_sos_alive(psp)) + return 0; + + ret = psp_v14_0_wait_for_bootloader(psp); + if (ret) + return ret; + + memset(psp->fw_pri_buf, 0, PSP_1_MEG); + + /* Copy Secure OS binary to PSP memory */ + memcpy(psp->fw_pri_buf, psp->sos.start_addr, psp->sos.size_bytes); + + /* Provide the PSP secure OS to bootloader */ + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_36, + (uint32_t)(psp->fw_pri_mc_addr >> 20)); + psp_gfxdrv_command_reg = PSP_BL__LOAD_SOSDRV; + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_35, + psp_gfxdrv_command_reg); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_81), + RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_81), + 0, true); + + return ret; +} + +static int psp_v14_0_ring_stop(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + struct amdgpu_device *adev = psp->adev; + + if (amdgpu_sriov_vf(adev)) { + /* Write the ring destroy command*/ + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING); + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + /* Wait for response flag (bit 31) */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_101), + 0x80000000, 0x80000000, false); + } else { + /* Write the ring destroy command*/ + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_64, + GFX_CTRL_CMD_ID_DESTROY_RINGS); + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + /* Wait for response flag (bit 31) */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64), + 0x80000000, 0x80000000, false); + } + + return ret; +} + +static int psp_v14_0_ring_create(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + unsigned int psp_ring_reg = 0; + struct psp_ring *ring = &psp->km_ring; + struct amdgpu_device *adev = psp->adev; + + if (amdgpu_sriov_vf(adev)) { + ret = psp_v14_0_ring_stop(psp, ring_type); + if (ret) { + DRM_ERROR("psp_v14_0_ring_stop_sriov failed!\n"); + return ret; + } + + /* Write low address of the ring to C2PMSG_102 */ + psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_102, psp_ring_reg); + /* Write high address of the ring to C2PMSG_103 */ + psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_103, psp_ring_reg); + + /* Write the ring initialization command to C2PMSG_101 */ + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_INIT_GPCOM_RING); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_101 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_101), + 0x80000000, 0x8000FFFF, false); + + } else { + /* Wait for sOS ready for ring creation */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64), + 0x80000000, 0x80000000, false); + if (ret) { + DRM_ERROR("Failed to wait for trust OS ready for ring creation\n"); + return ret; + } + + /* Write low address of the ring to C2PMSG_69 */ + psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_69, psp_ring_reg); + /* Write high address of the ring to C2PMSG_70 */ + psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_70, psp_ring_reg); + /* Write size of ring to C2PMSG_71 */ + psp_ring_reg = ring->ring_size; + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_71, psp_ring_reg); + /* Write the ring initialization command to C2PMSG_64 */ + psp_ring_reg = ring_type; + psp_ring_reg = psp_ring_reg << 16; + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_64, psp_ring_reg); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_64 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64), + 0x80000000, 0x8000FFFF, false); + } + + return ret; +} + +static int psp_v14_0_ring_destroy(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + struct psp_ring *ring = &psp->km_ring; + struct amdgpu_device *adev = psp->adev; + + ret = psp_v14_0_ring_stop(psp, ring_type); + if (ret) + DRM_ERROR("Fail to stop psp ring\n"); + + amdgpu_bo_free_kernel(&adev->firmware.rbuf, + &ring->ring_mem_mc_addr, + (void **)&ring->ring_mem); + + return ret; +} + +static uint32_t psp_v14_0_ring_get_wptr(struct psp_context *psp) +{ + uint32_t data; + struct amdgpu_device *adev = psp->adev; + + if (amdgpu_sriov_vf(adev)) + data = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_102); + else + data = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_67); + + return data; +} + +static void psp_v14_0_ring_set_wptr(struct psp_context *psp, uint32_t value) +{ + struct amdgpu_device *adev = psp->adev; + + if (amdgpu_sriov_vf(adev)) { + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_102, value); + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_CONSUME_CMD); + } else + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_67, value); +} + +static int psp_v14_0_memory_training_send_msg(struct psp_context *psp, int msg) +{ + int ret; + int i; + uint32_t data_32; + int max_wait; + struct amdgpu_device *adev = psp->adev; + + data_32 = (psp->mem_train_ctx.c2p_train_data_offset >> 20); + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_36, data_32); + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_35, msg); + + max_wait = MEM_TRAIN_SEND_MSG_TIMEOUT_US / adev->usec_timeout; + for (i = 0; i < max_wait; i++) { + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_35), + 0x80000000, 0x80000000, false); + if (ret == 0) + break; + } + if (i < max_wait) + ret = 0; + else + ret = -ETIME; + + dev_dbg(adev->dev, "training %s %s, cost %d @ %d ms\n", + (msg == PSP_BL__DRAM_SHORT_TRAIN) ? "short" : "long", + (ret == 0) ? "succeed" : "failed", + i, adev->usec_timeout/1000); + return ret; +} + + +static int psp_v14_0_memory_training(struct psp_context *psp, uint32_t ops) +{ + struct psp_memory_training_context *ctx = &psp->mem_train_ctx; + uint32_t *pcache = (uint32_t *)ctx->sys_cache; + struct amdgpu_device *adev = psp->adev; + uint32_t p2c_header[4]; + uint32_t sz; + void *buf; + int ret, idx; + + if (ctx->init == PSP_MEM_TRAIN_NOT_SUPPORT) { + dev_dbg(adev->dev, "Memory training is not supported.\n"); + return 0; + } else if (ctx->init != PSP_MEM_TRAIN_INIT_SUCCESS) { + dev_err(adev->dev, "Memory training initialization failure.\n"); + return -EINVAL; + } + + if (psp_v14_0_is_sos_alive(psp)) { + dev_dbg(adev->dev, "SOS is alive, skip memory training.\n"); + return 0; + } + + amdgpu_device_vram_access(adev, ctx->p2c_train_data_offset, p2c_header, sizeof(p2c_header), false); + dev_dbg(adev->dev, "sys_cache[%08x,%08x,%08x,%08x] p2c_header[%08x,%08x,%08x,%08x]\n", + pcache[0], pcache[1], pcache[2], pcache[3], + p2c_header[0], p2c_header[1], p2c_header[2], p2c_header[3]); + + if (ops & PSP_MEM_TRAIN_SEND_SHORT_MSG) { + dev_dbg(adev->dev, "Short training depends on restore.\n"); + ops |= PSP_MEM_TRAIN_RESTORE; + } + + if ((ops & PSP_MEM_TRAIN_RESTORE) && + pcache[0] != MEM_TRAIN_SYSTEM_SIGNATURE) { + dev_dbg(adev->dev, "sys_cache[0] is invalid, restore depends on save.\n"); + ops |= PSP_MEM_TRAIN_SAVE; + } + + if (p2c_header[0] == MEM_TRAIN_SYSTEM_SIGNATURE && + !(pcache[0] == MEM_TRAIN_SYSTEM_SIGNATURE && + pcache[3] == p2c_header[3])) { + dev_dbg(adev->dev, "sys_cache is invalid or out-of-date, need save training data to sys_cache.\n"); + ops |= PSP_MEM_TRAIN_SAVE; + } + + if ((ops & PSP_MEM_TRAIN_SAVE) && + p2c_header[0] != MEM_TRAIN_SYSTEM_SIGNATURE) { + dev_dbg(adev->dev, "p2c_header[0] is invalid, save depends on long training.\n"); + ops |= PSP_MEM_TRAIN_SEND_LONG_MSG; + } + + if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) { + ops &= ~PSP_MEM_TRAIN_SEND_SHORT_MSG; + ops |= PSP_MEM_TRAIN_SAVE; + } + + dev_dbg(adev->dev, "Memory training ops:%x.\n", ops); + + if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) { + /* + * Long training will encroach a certain amount on the bottom of VRAM; + * save the content from the bottom of VRAM to system memory + * before training, and restore it after training to avoid + * VRAM corruption. + */ + sz = BIST_MEM_TRAINING_ENCROACHED_SIZE; + + if (adev->gmc.visible_vram_size < sz || !adev->mman.aper_base_kaddr) { + dev_err(adev->dev, "visible_vram_size %llx or aper_base_kaddr %p is not initialized.\n", + adev->gmc.visible_vram_size, + adev->mman.aper_base_kaddr); + return -EINVAL; + } + + buf = vmalloc(sz); + if (!buf) { + dev_err(adev->dev, "failed to allocate system memory.\n"); + return -ENOMEM; + } + + if (drm_dev_enter(adev_to_drm(adev), &idx)) { + memcpy_fromio(buf, adev->mman.aper_base_kaddr, sz); + ret = psp_v14_0_memory_training_send_msg(psp, PSP_BL__DRAM_LONG_TRAIN); + if (ret) { + DRM_ERROR("Send long training msg failed.\n"); + vfree(buf); + drm_dev_exit(idx); + return ret; + } + + memcpy_toio(adev->mman.aper_base_kaddr, buf, sz); + adev->hdp.funcs->flush_hdp(adev, NULL); + vfree(buf); + drm_dev_exit(idx); + } else { + vfree(buf); + return -ENODEV; + } + } + + if (ops & PSP_MEM_TRAIN_SAVE) { + amdgpu_device_vram_access(psp->adev, ctx->p2c_train_data_offset, ctx->sys_cache, ctx->train_data_size, false); + } + + if (ops & PSP_MEM_TRAIN_RESTORE) { + amdgpu_device_vram_access(psp->adev, ctx->c2p_train_data_offset, ctx->sys_cache, ctx->train_data_size, true); + } + + if (ops & PSP_MEM_TRAIN_SEND_SHORT_MSG) { + ret = psp_v14_0_memory_training_send_msg(psp, (amdgpu_force_long_training > 0) ? + PSP_BL__DRAM_LONG_TRAIN : PSP_BL__DRAM_SHORT_TRAIN); + if (ret) { + dev_err(adev->dev, "send training msg failed.\n"); + return ret; + } + } + ctx->training_cnt++; + return 0; +} + +static int psp_v14_0_load_usbc_pd_fw(struct psp_context *psp, uint64_t fw_pri_mc_addr) +{ + struct amdgpu_device *adev = psp->adev; + uint32_t reg_status; + int ret, i = 0; + + /* + * LFB address which is aligned to 1MB address and has to be + * right-shifted by 20 so that LFB address can be passed on a 32-bit C2P + * register + */ + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_36, (fw_pri_mc_addr >> 20)); + + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_35), + 0x80000000, 0x80000000, false); + if (ret) + return ret; + + /* Fireup interrupt so PSP can pick up the address */ + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_35, (GFX_CMD_USB_PD_USE_LFB << 16)); + + /* FW load takes very long time */ + do { + msleep(1000); + reg_status = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_35); + + if (reg_status & 0x80000000) + goto done; + + } while (++i < USBC_PD_POLLING_LIMIT_S); + + return -ETIME; +done: + + if ((reg_status & 0xFFFF) != 0) { + DRM_ERROR("Address load failed - MP0_SMN_C2PMSG_35.Bits [15:0] = %04x\n", + reg_status & 0xFFFF); + return -EIO; + } + + return 0; +} + +static int psp_v14_0_read_usbc_pd_fw(struct psp_context *psp, uint32_t *fw_ver) +{ + struct amdgpu_device *adev = psp->adev; + int ret; + + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_35, C2PMSG_CMD_GFX_USB_PD_FW_VER); + + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_35), + 0x80000000, 0x80000000, false); + if (!ret) + *fw_ver = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_36); + + return ret; +} + +static int psp_v14_0_exec_spi_cmd(struct psp_context *psp, int cmd) +{ + uint32_t reg_status = 0, reg_val = 0; + struct amdgpu_device *adev = psp->adev; + int ret; + + /* clear MBX ready (MBOX_READY_MASK bit is 0) and set update command */ + reg_val |= (cmd << 16); + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_115, reg_val); + + /* Ring the doorbell */ + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_73, 1); + + if (cmd == C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE) + ret = psp_wait_for_spirom_update(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_115), + MBOX_READY_FLAG, MBOX_READY_MASK, PSP_SPIROM_UPDATE_TIMEOUT); + else + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_115), + MBOX_READY_FLAG, MBOX_READY_MASK, false); + + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_115), + MBOX_READY_FLAG, MBOX_READY_MASK, false); + if (ret) { + dev_err(adev->dev, "SPI cmd %x timed out, ret = %d", cmd, ret); + return ret; + } + + reg_status = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_115); + if ((reg_status & 0xFFFF) != 0) { + dev_err(adev->dev, "SPI cmd %x failed, fail status = %04x\n", + cmd, reg_status & 0xFFFF); + return -EIO; + } + + return 0; +} + +static int psp_v14_0_update_spirom(struct psp_context *psp, + uint64_t fw_pri_mc_addr) +{ + struct amdgpu_device *adev = psp->adev; + int ret; + + /* Confirm PSP is ready to start */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_115), + MBOX_READY_FLAG, MBOX_READY_MASK, false); + if (ret) { + dev_err(adev->dev, "PSP Not ready to start processing, ret = %d", ret); + return ret; + } + + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_116, lower_32_bits(fw_pri_mc_addr)); + + ret = psp_v14_0_exec_spi_cmd(psp, C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_LO); + if (ret) + return ret; + + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_116, upper_32_bits(fw_pri_mc_addr)); + + ret = psp_v14_0_exec_spi_cmd(psp, C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_HI); + if (ret) + return ret; + + psp->vbflash_done = true; + + ret = psp_v14_0_exec_spi_cmd(psp, C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE); + if (ret) + return ret; + + return 0; +} + +static int psp_v14_0_vbflash_status(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + + return RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_115); +} + +static const struct psp_funcs psp_v14_0_funcs = { + .init_microcode = psp_v14_0_init_microcode, + .bootloader_load_kdb = psp_v14_0_bootloader_load_kdb, + .bootloader_load_spl = psp_v14_0_bootloader_load_spl, + .bootloader_load_sysdrv = psp_v14_0_bootloader_load_sysdrv, + .bootloader_load_soc_drv = psp_v14_0_bootloader_load_soc_drv, + .bootloader_load_intf_drv = psp_v14_0_bootloader_load_intf_drv, + .bootloader_load_dbg_drv = psp_v14_0_bootloader_load_dbg_drv, + .bootloader_load_ras_drv = psp_v14_0_bootloader_load_ras_drv, + .bootloader_load_sos = psp_v14_0_bootloader_load_sos, + .ring_create = psp_v14_0_ring_create, + .ring_stop = psp_v14_0_ring_stop, + .ring_destroy = psp_v14_0_ring_destroy, + .ring_get_wptr = psp_v14_0_ring_get_wptr, + .ring_set_wptr = psp_v14_0_ring_set_wptr, + .mem_training = psp_v14_0_memory_training, + .load_usbc_pd_fw = psp_v14_0_load_usbc_pd_fw, + .read_usbc_pd_fw = psp_v14_0_read_usbc_pd_fw, + .update_spirom = psp_v14_0_update_spirom, + .vbflash_stat = psp_v14_0_vbflash_status +}; + +void psp_v14_0_set_psp_funcs(struct psp_context *psp) +{ + psp->funcs = &psp_v14_0_funcs; +} diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.h b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.h new file mode 100644 index 0000000000..dd18ba2cfa --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.h @@ -0,0 +1,32 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __PSP_V14_0_H__ +#define __PSP_V14_0_H__ + +#include "amdgpu_psp.h" + +#define PSP_SPIROM_UPDATE_TIMEOUT 60000 /* 60s */ + +void psp_v14_0_set_psp_funcs(struct psp_context *psp); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 8d5d86675a..07e19caf2b 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -57,22 +57,19 @@ static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev); MODULE_FIRMWARE("amdgpu/topaz_sdma.bin"); MODULE_FIRMWARE("amdgpu/topaz_sdma1.bin"); -static const u32 sdma_offsets[SDMA_MAX_INSTANCE] = -{ +static const u32 sdma_offsets[SDMA_MAX_INSTANCE] = { SDMA0_REGISTER_OFFSET, SDMA1_REGISTER_OFFSET }; -static const u32 golden_settings_iceland_a11[] = -{ +static const u32 golden_settings_iceland_a11[] = { mmSDMA0_CHICKEN_BITS, 0xfc910007, 0x00810007, mmSDMA0_CLK_CTRL, 0xff000fff, 0x00000000, mmSDMA1_CHICKEN_BITS, 0xfc910007, 0x00810007, mmSDMA1_CLK_CTRL, 0xff000fff, 0x00000000, }; -static const u32 iceland_mgcg_cgcg_init[] = -{ +static const u32 iceland_mgcg_cgcg_init[] = { mmSDMA0_CLK_CTRL, 0xff000ff0, 0x00000100, mmSDMA1_CLK_CTRL, 0xff000ff0, 0x00000100 }; @@ -142,7 +139,8 @@ static int sdma_v2_4_init_microcode(struct amdgpu_device *adev) case CHIP_TOPAZ: chip_name = "topaz"; break; - default: BUG(); + default: + BUG(); } for (i = 0; i < adev->sdma.num_instances; i++) { @@ -1258,8 +1256,7 @@ static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev) adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; } -const struct amdgpu_ip_block_version sdma_v2_4_ip_block = -{ +const struct amdgpu_ip_block_version sdma_v2_4_ip_block = { .type = AMD_IP_BLOCK_TYPE_SDMA, .major = 2, .minor = 4, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 3d68dd5523..8f23176675 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -2021,6 +2021,9 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev, DRM_DEBUG("IH: SDMA trap\n"); instance = sdma_v4_0_irq_id_to_seq(entry->client_id); + if (instance < 0) + return instance; + switch (entry->ring_id) { case 0: amdgpu_fence_process(&adev->sdma.instance[instance].ring); @@ -2104,7 +2107,7 @@ static int sdma_v4_0_print_iv_entry(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { int instance; - struct amdgpu_task_info task_info; + struct amdgpu_task_info *task_info; u64 addr; instance = sdma_v4_0_irq_id_to_seq(entry->client_id); @@ -2116,15 +2119,20 @@ static int sdma_v4_0_print_iv_entry(struct amdgpu_device *adev, addr = (u64)entry->src_data[0] << 12; addr |= ((u64)entry->src_data[1] & 0xf) << 44; - memset(&task_info, 0, sizeof(struct amdgpu_task_info)); - amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); - dev_dbg_ratelimited(adev->dev, - "[sdma%d] address:0x%016llx src_id:%u ring:%u vmid:%u " - "pasid:%u, for process %s pid %d thread %s pid %d\n", - instance, addr, entry->src_id, entry->ring_id, entry->vmid, - entry->pasid, task_info.process_name, task_info.tgid, - task_info.task_name, task_info.pid); + "[sdma%d] address:0x%016llx src_id:%u ring:%u vmid:%u pasid:%u\n", + instance, addr, entry->src_id, entry->ring_id, entry->vmid, + entry->pasid); + + task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid); + if (task_info) { + dev_dbg_ratelimited(adev->dev, + " for process %s pid %d thread %s pid %d\n", + task_info->process_name, task_info->tgid, + task_info->task_name, task_info->pid); + amdgpu_vm_put_task_info(task_info); + } + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 328682cbb7..e708468ac5 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -45,6 +45,8 @@ MODULE_FIRMWARE("amdgpu/sdma_4_4_2.bin"); +#define mmSMNAID_AID0_MCA_SMU 0x03b30400 + #define WREG32_SDMA(instance, offset, value) \ WREG32(sdma_v4_4_2_get_reg_offset(adev, (instance), (offset)), value) #define RREG32_SDMA(instance, offset) \ @@ -596,7 +598,7 @@ static uint32_t sdma_v4_4_2_rb_cntl(struct amdgpu_ring *ring, uint32_t rb_cntl) /* Set ring buffer size in dwords */ uint32_t rb_bufsz = order_base_2(ring->ring_size / 4); - barrier(); /* work around https://bugs.llvm.org/show_bug.cgi?id=42576 */ + barrier(); /* work around https://llvm.org/pr42576 */ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL, RB_SIZE, rb_bufsz); #ifdef __BIG_ENDIAN rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL, RB_SWAP_ENABLE, 1); @@ -1612,7 +1614,7 @@ static int sdma_v4_4_2_print_iv_entry(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { int instance; - struct amdgpu_task_info task_info; + struct amdgpu_task_info *task_info; u64 addr; instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id); @@ -1624,15 +1626,19 @@ static int sdma_v4_4_2_print_iv_entry(struct amdgpu_device *adev, addr = (u64)entry->src_data[0] << 12; addr |= ((u64)entry->src_data[1] & 0xf) << 44; - memset(&task_info, 0, sizeof(struct amdgpu_task_info)); - amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); - dev_dbg_ratelimited(adev->dev, - "[sdma%d] address:0x%016llx src_id:%u ring:%u vmid:%u " - "pasid:%u, for process %s pid %d thread %s pid %d\n", - instance, addr, entry->src_id, entry->ring_id, entry->vmid, - entry->pasid, task_info.process_name, task_info.tgid, - task_info.task_name, task_info.pid); + "[sdma%d] address:0x%016llx src_id:%u ring:%u vmid:%u pasid:%u\n", + instance, addr, entry->src_id, entry->ring_id, entry->vmid, + entry->pasid); + + task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid); + if (task_info) { + dev_dbg_ratelimited(adev->dev, " for process %s pid %d thread %s pid %d\n", + task_info->process_name, task_info->tgid, + task_info->task_name, task_info->pid); + amdgpu_vm_put_task_info(task_info); + } + return 0; } @@ -2174,9 +2180,79 @@ static const struct amdgpu_ras_block_hw_ops sdma_v4_4_2_ras_hw_ops = { .reset_ras_error_count = sdma_v4_4_2_reset_ras_error_count, }; +static int sdma_v4_4_2_aca_bank_generate_report(struct aca_handle *handle, + struct aca_bank *bank, enum aca_error_type type, + struct aca_bank_report *report, void *data) +{ + u64 status, misc0; + int ret; + + status = bank->regs[ACA_REG_IDX_STATUS]; + if ((type == ACA_ERROR_TYPE_UE && + ACA_REG__STATUS__ERRORCODEEXT(status) == ACA_EXTERROR_CODE_FAULT) || + (type == ACA_ERROR_TYPE_CE && + ACA_REG__STATUS__ERRORCODEEXT(status) == ACA_EXTERROR_CODE_CE)) { + + ret = aca_bank_info_decode(bank, &report->info); + if (ret) + return ret; + + misc0 = bank->regs[ACA_REG_IDX_MISC0]; + report->count[type] = ACA_REG__MISC0__ERRCNT(misc0); + } + + return 0; +} + +/* CODE_SDMA0 - CODE_SDMA4, reference to smu driver if header file */ +static int sdma_v4_4_2_err_codes[] = { 33, 34, 35, 36 }; + +static bool sdma_v4_4_2_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, + enum aca_error_type type, void *data) +{ + u32 instlo; + + instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]); + instlo &= GENMASK(31, 1); + + if (instlo != mmSMNAID_AID0_MCA_SMU) + return false; + + if (aca_bank_check_error_codes(handle->adev, bank, + sdma_v4_4_2_err_codes, + ARRAY_SIZE(sdma_v4_4_2_err_codes))) + return false; + + return true; +} + +static const struct aca_bank_ops sdma_v4_4_2_aca_bank_ops = { + .aca_bank_generate_report = sdma_v4_4_2_aca_bank_generate_report, + .aca_bank_is_valid = sdma_v4_4_2_aca_bank_is_valid, +}; + +static const struct aca_info sdma_v4_4_2_aca_info = { + .hwip = ACA_HWIP_TYPE_SMU, + .mask = ACA_ERROR_UE_MASK, + .bank_ops = &sdma_v4_4_2_aca_bank_ops, +}; + +static int sdma_v4_4_2_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) +{ + int r; + + r = amdgpu_sdma_ras_late_init(adev, ras_block); + if (r) + return r; + + return amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__SDMA, + &sdma_v4_4_2_aca_info, NULL); +} + static struct amdgpu_sdma_ras sdma_v4_4_2_ras = { .ras_block = { .hw_ops = &sdma_v4_4_2_ras_hw_ops, + .ras_late_init = sdma_v4_4_2_ras_late_init, }, }; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 3c485e5a53..883e8a1b8a 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -249,35 +249,23 @@ static int sdma_v5_0_init_microcode(struct amdgpu_device *adev) return ret; } -static unsigned sdma_v5_0_ring_init_cond_exec(struct amdgpu_ring *ring) +static unsigned sdma_v5_0_ring_init_cond_exec(struct amdgpu_ring *ring, + uint64_t addr) { unsigned ret; amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COND_EXE)); - amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, 1); - ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */ - amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */ + /* this is the offset we need patch later */ + ret = ring->wptr & ring->buf_mask; + /* insert dummy here and patch it later */ + amdgpu_ring_write(ring, 0); return ret; } -static void sdma_v5_0_ring_patch_cond_exec(struct amdgpu_ring *ring, - unsigned offset) -{ - unsigned cur; - - BUG_ON(offset > ring->buf_mask); - BUG_ON(ring->ring[offset] != 0x55aa55aa); - - cur = (ring->wptr - 1) & ring->buf_mask; - if (cur > offset) - ring->ring[offset] = cur - offset; - else - ring->ring[offset] = (ring->buf_mask + 1) - offset + cur; -} - /** * sdma_v5_0_ring_get_rptr - get the current read pointer * @@ -1780,7 +1768,6 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = { .emit_reg_wait = sdma_v5_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait, .init_cond_exec = sdma_v5_0_ring_init_cond_exec, - .patch_cond_exec = sdma_v5_0_ring_patch_cond_exec, .preempt_ib = sdma_v5_0_ring_preempt_ib, }; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 46f9f58d84..da01b524b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -89,35 +89,23 @@ static u32 sdma_v5_2_get_reg_offset(struct amdgpu_device *adev, u32 instance, u3 return base + internal_offset; } -static unsigned sdma_v5_2_ring_init_cond_exec(struct amdgpu_ring *ring) +static unsigned sdma_v5_2_ring_init_cond_exec(struct amdgpu_ring *ring, + uint64_t addr) { unsigned ret; amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COND_EXE)); - amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, 1); - ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */ - amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */ + /* this is the offset we need patch later */ + ret = ring->wptr & ring->buf_mask; + /* insert dummy here and patch it later */ + amdgpu_ring_write(ring, 0); return ret; } -static void sdma_v5_2_ring_patch_cond_exec(struct amdgpu_ring *ring, - unsigned offset) -{ - unsigned cur; - - BUG_ON(offset > ring->buf_mask); - BUG_ON(ring->ring[offset] != 0x55aa55aa); - - cur = (ring->wptr - 1) & ring->buf_mask; - if (cur > offset) - ring->ring[offset] = cur - offset; - else - ring->ring[offset] = (ring->buf_mask + 1) - offset + cur; -} - /** * sdma_v5_2_ring_get_rptr - get the current read pointer * @@ -1726,7 +1714,6 @@ static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = { .emit_reg_wait = sdma_v5_2_ring_emit_reg_wait, .emit_reg_write_reg_wait = sdma_v5_2_ring_emit_reg_write_reg_wait, .init_cond_exec = sdma_v5_2_ring_init_cond_exec, - .patch_cond_exec = sdma_v5_2_ring_patch_cond_exec, .preempt_ib = sdma_v5_2_ring_preempt_ib, }; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index 3c7ddd219d..361835a61f 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -49,6 +49,7 @@ MODULE_FIRMWARE("amdgpu/sdma_6_0_1.bin"); MODULE_FIRMWARE("amdgpu/sdma_6_0_2.bin"); MODULE_FIRMWARE("amdgpu/sdma_6_0_3.bin"); MODULE_FIRMWARE("amdgpu/sdma_6_1_0.bin"); +MODULE_FIRMWARE("amdgpu/sdma_6_1_1.bin"); #define SDMA1_REG_OFFSET 0x600 #define SDMA0_HYP_DEC_REG_START 0x5880 @@ -79,35 +80,23 @@ static u32 sdma_v6_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u3 return base + internal_offset; } -static unsigned sdma_v6_0_ring_init_cond_exec(struct amdgpu_ring *ring) +static unsigned sdma_v6_0_ring_init_cond_exec(struct amdgpu_ring *ring, + uint64_t addr) { unsigned ret; amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COND_EXE)); - amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, 1); - ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */ - amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */ + /* this is the offset we need patch later */ + ret = ring->wptr & ring->buf_mask; + /* insert dummy here and patch it later */ + amdgpu_ring_write(ring, 0); return ret; } -static void sdma_v6_0_ring_patch_cond_exec(struct amdgpu_ring *ring, - unsigned offset) -{ - unsigned cur; - - BUG_ON(offset > ring->buf_mask); - BUG_ON(ring->ring[offset] != 0x55aa55aa); - - cur = (ring->wptr - 1) & ring->buf_mask; - if (cur > offset) - ring->ring[offset] = cur - offset; - else - ring->ring[offset] = (ring->buf_mask + 1) - offset + cur; -} - /** * sdma_v6_0_ring_get_rptr - get the current read pointer * @@ -1541,7 +1530,6 @@ static const struct amdgpu_ring_funcs sdma_v6_0_ring_funcs = { .emit_reg_wait = sdma_v6_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = sdma_v6_0_ring_emit_reg_write_reg_wait, .init_cond_exec = sdma_v6_0_ring_init_cond_exec, - .patch_cond_exec = sdma_v6_0_ring_patch_cond_exec, .preempt_ib = sdma_v6_0_ring_preempt_ib, }; diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index a757526153..23e4ef4fff 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -2331,28 +2331,18 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev) gpu_cfg & PCI_EXP_LNKCTL_HAWD); - pcie_capability_read_word(root, PCI_EXP_LNKCTL2, - &tmp16); - tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP | - PCI_EXP_LNKCTL2_TX_MARGIN); - tmp16 |= (bridge_cfg2 & - (PCI_EXP_LNKCTL2_ENTER_COMP | - PCI_EXP_LNKCTL2_TX_MARGIN)); - pcie_capability_write_word(root, - PCI_EXP_LNKCTL2, - tmp16); - - pcie_capability_read_word(adev->pdev, - PCI_EXP_LNKCTL2, - &tmp16); - tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP | - PCI_EXP_LNKCTL2_TX_MARGIN); - tmp16 |= (gpu_cfg2 & - (PCI_EXP_LNKCTL2_ENTER_COMP | - PCI_EXP_LNKCTL2_TX_MARGIN)); - pcie_capability_write_word(adev->pdev, - PCI_EXP_LNKCTL2, - tmp16); + pcie_capability_clear_and_set_word(root, PCI_EXP_LNKCTL2, + PCI_EXP_LNKCTL2_ENTER_COMP | + PCI_EXP_LNKCTL2_TX_MARGIN, + bridge_cfg2 & + (PCI_EXP_LNKCTL2_ENTER_COMP | + PCI_EXP_LNKCTL2_TX_MARGIN)); + pcie_capability_clear_and_set_word(adev->pdev, PCI_EXP_LNKCTL2, + PCI_EXP_LNKCTL2_ENTER_COMP | + PCI_EXP_LNKCTL2_TX_MARGIN, + gpu_cfg2 & + (PCI_EXP_LNKCTL2_ENTER_COMP | + PCI_EXP_LNKCTL2_TX_MARGIN)); tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4); tmp &= ~LC_SET_QUIESCE; @@ -2365,16 +2355,15 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev) speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE; WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl); - pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL2, &tmp16); - tmp16 &= ~PCI_EXP_LNKCTL2_TLS; - + tmp16 = 0; if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */ else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2) tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */ else tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */ - pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL2, tmp16); + pcie_capability_clear_and_set_word(adev->pdev, PCI_EXP_LNKCTL2, + PCI_EXP_LNKCTL2_TLS, tmp16); speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL); speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE; diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 1c614451de..dec81ccf62 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -918,7 +918,6 @@ static const struct amdgpu_asic_funcs aqua_vanjaram_asic_funcs = .get_config_memsize = &soc15_get_config_memsize, .need_full_reset = &soc15_need_full_reset, .init_doorbell_index = &aqua_vanjaram_doorbell_index_init, - .get_pcie_usage = &amdgpu_nbio_get_pcie_usage, .need_reset_on_init = &soc15_need_reset_on_init, .get_pcie_replay_count = &amdgpu_nbio_get_pcie_replay_count, .supports_baco = &soc15_supports_baco, @@ -1301,7 +1300,8 @@ static int soc15_common_hw_fini(void *handle) if (amdgpu_sriov_vf(adev)) xgpu_ai_mailbox_put_irq(adev); - if (adev->nbio.ras_if && + if ((!amdgpu_sriov_vf(adev)) && + adev->nbio.ras_if && amdgpu_ras_is_supported(adev, adev->nbio.ras_if->block)) { if (adev->nbio.ras && adev->nbio.ras->init_ras_controller_interrupt) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index fd4505fa4f..43ca63fe85 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -185,6 +185,12 @@ static int soc21_query_video_codecs(struct amdgpu_device *adev, bool encode, } } return 0; + case IP_VERSION(4, 0, 6): + if (encode) + *codecs = &vcn_4_0_0_video_codecs_encode_vcn0; + else + *codecs = &vcn_4_0_0_video_codecs_decode_vcn0; + return 0; default: return -EINVAL; } @@ -382,6 +388,7 @@ soc21_asic_reset_method(struct amdgpu_device *adev) case IP_VERSION(13, 0, 4): case IP_VERSION(13, 0, 11): case IP_VERSION(14, 0, 0): + case IP_VERSION(14, 0, 1): return AMD_RESET_METHOD_MODE2; default: if (amdgpu_dpm_is_baco_supported(adev)) @@ -709,6 +716,7 @@ static int soc21_common_early_init(void *handle) AMD_CG_SUPPORT_BIF_MGCG | AMD_CG_SUPPORT_BIF_LS; adev->pg_flags = AMD_PG_SUPPORT_VCN_DPG | + AMD_PG_SUPPORT_JPEG_DPG | AMD_PG_SUPPORT_VCN | AMD_PG_SUPPORT_JPEG | AMD_PG_SUPPORT_GFX_PG; @@ -717,6 +725,35 @@ static int soc21_common_early_init(void *handle) else adev->external_rev_id = adev->rev_id + 0x10; break; + case IP_VERSION(11, 5, 1): + adev->cg_flags = + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_FGCG | + AMD_CG_SUPPORT_REPEATER_FGCG | + AMD_CG_SUPPORT_GFX_PERF_CLK | + AMD_CG_SUPPORT_GFX_3D_CGCG | + AMD_CG_SUPPORT_GFX_3D_CGLS | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_HDP_DS | + AMD_CG_SUPPORT_HDP_SD | + AMD_CG_SUPPORT_ATHUB_MGCG | + AMD_CG_SUPPORT_ATHUB_LS | + AMD_CG_SUPPORT_IH_CG | + AMD_CG_SUPPORT_BIF_MGCG | + AMD_CG_SUPPORT_BIF_LS | + AMD_CG_SUPPORT_VCN_MGCG | + AMD_CG_SUPPORT_JPEG_MGCG; + adev->pg_flags = + AMD_PG_SUPPORT_GFX_PG | + AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_VCN_DPG | + AMD_PG_SUPPORT_JPEG; + adev->external_rev_id = adev->rev_id + 0xc1; + break; default: /* FIXME: not supported yet */ return -EINVAL; @@ -891,6 +928,7 @@ static int soc21_common_set_clockgating_state(void *handle, case IP_VERSION(7, 7, 0): case IP_VERSION(7, 7, 1): case IP_VERSION(7, 11, 0): + case IP_VERSION(7, 11, 1): adev->nbio.funcs->update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE); adev->nbio.funcs->update_medium_grain_light_sleep(adev, diff --git a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h index 879bb7af29..056d4df8fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h +++ b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h @@ -36,6 +36,9 @@ enum ras_command { TA_RAS_COMMAND__ENABLE_FEATURES = 0, TA_RAS_COMMAND__DISABLE_FEATURES, TA_RAS_COMMAND__TRIGGER_ERROR, + TA_RAS_COMMAND__QUERY_BLOCK_INFO, + TA_RAS_COMMAND__QUERY_SUB_BLOCK_INFO, + TA_RAS_COMMAND__QUERY_ADDRESS, }; enum ta_ras_status { @@ -105,6 +108,11 @@ enum ta_ras_error_type { TA_RAS_ERROR__POISON = 8, }; +enum ta_ras_address_type { + TA_RAS_MCA_TO_PA, + TA_RAS_PA_TO_MCA, +}; + /* Input/output structures for RAS commands */ /**********************************************************/ @@ -133,12 +141,38 @@ struct ta_ras_init_flags { uint8_t channel_dis_num; }; +struct ta_ras_mca_addr { + uint64_t err_addr; + uint32_t ch_inst; + uint32_t umc_inst; + uint32_t node_inst; +}; + +struct ta_ras_phy_addr { + uint64_t pa; + uint32_t bank; + uint32_t channel_idx; +}; + +struct ta_ras_query_address_input { + enum ta_ras_address_type addr_type; + struct ta_ras_mca_addr ma; + struct ta_ras_phy_addr pa; +}; + struct ta_ras_output_flags { uint8_t ras_init_success_flag; uint8_t err_inject_switch_disable_flag; uint8_t reg_access_failure_flag; }; +struct ta_ras_query_address_output { + /* don't use the flags here */ + struct ta_ras_output_flags flags; + struct ta_ras_mca_addr ma; + struct ta_ras_phy_addr pa; +}; + /* Common input structure for RAS callbacks */ /**********************************************************/ union ta_ras_cmd_input { @@ -146,12 +180,14 @@ union ta_ras_cmd_input { struct ta_ras_enable_features_input enable_features; struct ta_ras_disable_features_input disable_features; struct ta_ras_trigger_error_input trigger_error; + struct ta_ras_query_address_input address; uint32_t reserve_pad[256]; }; union ta_ras_cmd_output { struct ta_ras_output_flags flags; + struct ta_ras_query_address_output address; uint32_t reserve_pad[256]; }; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index 7458a218e8..77af4e25ff 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -89,12 +89,28 @@ static void umc_v12_0_reset_error_count(struct amdgpu_device *adev) umc_v12_0_reset_error_count_per_channel, NULL); } +bool umc_v12_0_is_deferred_error(struct amdgpu_device *adev, uint64_t mc_umc_status) +{ + dev_info(adev->dev, + "MCA_UMC_STATUS(0x%llx): Val:%llu, Poison:%llu, Deferred:%llu, PCC:%llu, UC:%llu, TCC:%llu\n", + mc_umc_status, + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val), + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Poison), + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred), + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC), + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC), + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) + ); + + return (amdgpu_ras_is_poison_mode_supported(adev) && + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1)); +} + bool umc_v12_0_is_uncorrectable_error(struct amdgpu_device *adev, uint64_t mc_umc_status) { - if (amdgpu_ras_is_poison_mode_supported(adev) && - (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && - (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1)) - return true; + if (umc_v12_0_is_deferred_error(adev, mc_umc_status)) + return false; return ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 || @@ -104,9 +120,7 @@ bool umc_v12_0_is_uncorrectable_error(struct amdgpu_device *adev, uint64_t mc_um bool umc_v12_0_is_correctable_error(struct amdgpu_device *adev, uint64_t mc_umc_status) { - if (amdgpu_ras_is_poison_mode_supported(adev) && - (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && - (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1)) + if (umc_v12_0_is_deferred_error(adev, mc_umc_status)) return false; return (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && @@ -119,9 +133,10 @@ bool umc_v12_0_is_correctable_error(struct amdgpu_device *adev, uint64_t mc_umc_ !(umc_v12_0_is_uncorrectable_error(adev, mc_umc_status))))); } -static void umc_v12_0_query_correctable_error_count(struct amdgpu_device *adev, +static void umc_v12_0_query_error_count_per_type(struct amdgpu_device *adev, uint64_t umc_reg_offset, - unsigned long *error_count) + unsigned long *error_count, + check_error_type_func error_type_func) { uint64_t mc_umc_status; uint64_t mc_umc_status_addr; @@ -129,31 +144,11 @@ static void umc_v12_0_query_correctable_error_count(struct amdgpu_device *adev, mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); - /* Rely on MCUMC_STATUS for correctable error counter - * MCUMC_STATUS is a 64 bit register - */ + /* Check MCUMC_STATUS */ mc_umc_status = RREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4); - if (umc_v12_0_is_correctable_error(adev, mc_umc_status)) - *error_count += 1; -} - -static void umc_v12_0_query_uncorrectable_error_count(struct amdgpu_device *adev, - uint64_t umc_reg_offset, - unsigned long *error_count) -{ - uint64_t mc_umc_status; - uint64_t mc_umc_status_addr; - - mc_umc_status_addr = - SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); - - /* Check the MCUMC_STATUS. */ - mc_umc_status = - RREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4); - - if (umc_v12_0_is_uncorrectable_error(adev, mc_umc_status)) + if (error_type_func(adev, mc_umc_status)) *error_count += 1; } @@ -162,7 +157,7 @@ static int umc_v12_0_query_error_count(struct amdgpu_device *adev, uint32_t ch_inst, void *data) { struct ras_err_data *err_data = (struct ras_err_data *)data; - unsigned long ue_count = 0, ce_count = 0; + unsigned long ue_count = 0, ce_count = 0, de_count = 0; /* NOTE: node_inst is converted by adev->umc.active_mask and the range is [0-3], * which can be used as die ID directly */ @@ -174,11 +169,16 @@ static int umc_v12_0_query_error_count(struct amdgpu_device *adev, uint64_t umc_reg_offset = get_umc_v12_0_reg_offset(adev, node_inst, umc_inst, ch_inst); - umc_v12_0_query_correctable_error_count(adev, umc_reg_offset, &ce_count); - umc_v12_0_query_uncorrectable_error_count(adev, umc_reg_offset, &ue_count); + umc_v12_0_query_error_count_per_type(adev, umc_reg_offset, + &ce_count, umc_v12_0_is_correctable_error); + umc_v12_0_query_error_count_per_type(adev, umc_reg_offset, + &ue_count, umc_v12_0_is_uncorrectable_error); + umc_v12_0_query_error_count_per_type(adev, umc_reg_offset, + &de_count, umc_v12_0_is_deferred_error); amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, ue_count); amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, NULL, ce_count); + amdgpu_ras_error_statistic_de_count(err_data, &mcm_info, NULL, de_count); return 0; } @@ -203,14 +203,14 @@ static bool umc_v12_0_bit_wise_xor(uint32_t val) return result; } -static void umc_v12_0_convert_error_address(struct amdgpu_device *adev, - struct ras_err_data *err_data, uint64_t err_addr, - uint32_t ch_inst, uint32_t umc_inst, - uint32_t node_inst) +static void umc_v12_0_mca_addr_to_pa(struct amdgpu_device *adev, + uint64_t err_addr, uint32_t ch_inst, uint32_t umc_inst, + uint32_t node_inst, + struct ta_ras_query_address_output *addr_out) { uint32_t channel_index, i; - uint64_t soc_pa, na, retired_page, column; - uint32_t bank_hash0, bank_hash1, bank_hash2, bank_hash3, col, row, row_xor; + uint64_t na, soc_pa; + uint32_t bank_hash0, bank_hash1, bank_hash2, bank_hash3, col, row; uint32_t bank0, bank1, bank2, bank3, bank; bank_hash0 = (err_addr >> UMC_V12_0_MCA_B0_BIT) & 0x1ULL; @@ -260,12 +260,44 @@ static void umc_v12_0_convert_error_address(struct amdgpu_device *adev, /* the umc channel bits are not original values, they are hashed */ UMC_V12_0_SET_CHANNEL_HASH(channel_index, soc_pa); + addr_out->pa.pa = soc_pa; + addr_out->pa.bank = bank; + addr_out->pa.channel_idx = channel_index; +} + +static void umc_v12_0_convert_error_address(struct amdgpu_device *adev, + struct ras_err_data *err_data, uint64_t err_addr, + uint32_t ch_inst, uint32_t umc_inst, + uint32_t node_inst) +{ + uint32_t col, row, row_xor, bank, channel_index; + uint64_t soc_pa, retired_page, column; + struct ta_ras_query_address_input addr_in; + struct ta_ras_query_address_output addr_out; + + addr_in.addr_type = TA_RAS_MCA_TO_PA; + addr_in.ma.err_addr = err_addr; + addr_in.ma.ch_inst = ch_inst; + addr_in.ma.umc_inst = umc_inst; + addr_in.ma.node_inst = node_inst; + + if (psp_ras_query_address(&adev->psp, &addr_in, &addr_out)) + /* fallback to old path if fail to get pa from psp */ + umc_v12_0_mca_addr_to_pa(adev, err_addr, ch_inst, umc_inst, + node_inst, &addr_out); + + soc_pa = addr_out.pa.pa; + bank = addr_out.pa.bank; + channel_index = addr_out.pa.channel_idx; + + col = (err_addr >> 1) & 0x1fULL; + row = (err_addr >> 10) & 0x3fffULL; + row_xor = row ^ (0x1ULL << 13); /* clear [C3 C2] in soc physical address */ soc_pa &= ~(0x3ULL << UMC_V12_0_PA_C2_BIT); /* clear [C4] in soc physical address */ soc_pa &= ~(0x1ULL << UMC_V12_0_PA_C4_BIT); - row_xor = row ^ (0x1ULL << 13); /* loop for all possibilities of [C4 C3 C2] */ for (column = 0; column < UMC_V12_0_NA_MAP_PA_NUM; column++) { retired_page = soc_pa | ((column & 0x3) << UMC_V12_0_PA_C2_BIT); @@ -316,10 +348,8 @@ static int umc_v12_0_query_error_address(struct amdgpu_device *adev, } /* calculate error address if ue error is detected */ - if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, AddrV) == 1 && - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1) { - + if (umc_v12_0_is_uncorrectable_error(adev, mc_umc_status) || + umc_v12_0_is_deferred_error(adev, mc_umc_status)) { mc_umc_addrt0 = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_ADDRT0); @@ -385,45 +415,69 @@ static void umc_v12_0_ecc_info_query_ras_error_address(struct amdgpu_device *ade { struct ras_err_node *err_node; uint64_t mc_umc_status; + struct ras_err_info *err_info; + struct ras_err_addr *mca_err_addr, *tmp; struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; for_each_ras_error(err_node, err_data) { - mc_umc_status = err_node->err_info.err_addr.err_status; - if (!mc_umc_status) + err_info = &err_node->err_info; + if (list_empty(&err_info->err_addr_list)) continue; - if (umc_v12_0_is_uncorrectable_error(adev, mc_umc_status)) { - uint64_t mca_addr, err_addr, mca_ipid; - uint32_t InstanceIdLo; - struct amdgpu_smuio_mcm_config_info *mcm_info; - - mcm_info = &err_node->err_info.mcm_info; - mca_addr = err_node->err_info.err_addr.err_addr; - mca_ipid = err_node->err_info.err_addr.err_ipid; - - err_addr = REG_GET_FIELD(mca_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); - InstanceIdLo = REG_GET_FIELD(mca_ipid, MCMP1_IPIDT0, InstanceIdLo); - - dev_info(adev->dev, "UMC:IPID:0x%llx, aid:%d, inst:%d, ch:%d, err_addr:0x%llx\n", - mca_ipid, - mcm_info->die_id, - MCA_IPID_LO_2_UMC_INST(InstanceIdLo), - MCA_IPID_LO_2_UMC_CH(InstanceIdLo), - err_addr); - - umc_v12_0_convert_error_address(adev, - err_data, err_addr, - MCA_IPID_LO_2_UMC_CH(InstanceIdLo), - MCA_IPID_LO_2_UMC_INST(InstanceIdLo), - mcm_info->die_id); - - /* Clear umc error address content */ - memset(&err_node->err_info.err_addr, - 0, sizeof(err_node->err_info.err_addr)); + list_for_each_entry_safe(mca_err_addr, tmp, &err_info->err_addr_list, node) { + mc_umc_status = mca_err_addr->err_status; + if (mc_umc_status && + (umc_v12_0_is_uncorrectable_error(adev, mc_umc_status) || + umc_v12_0_is_deferred_error(adev, mc_umc_status))) { + uint64_t mca_addr, err_addr, mca_ipid; + uint32_t InstanceIdLo; + + mca_addr = mca_err_addr->err_addr; + mca_ipid = mca_err_addr->err_ipid; + + err_addr = REG_GET_FIELD(mca_addr, + MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); + InstanceIdLo = REG_GET_FIELD(mca_ipid, MCMP1_IPIDT0, InstanceIdLo); + + dev_info(adev->dev, "UMC:IPID:0x%llx, aid:%d, inst:%d, ch:%d, err_addr:0x%llx\n", + mca_ipid, + err_info->mcm_info.die_id, + MCA_IPID_LO_2_UMC_INST(InstanceIdLo), + MCA_IPID_LO_2_UMC_CH(InstanceIdLo), + err_addr); + + umc_v12_0_convert_error_address(adev, + err_data, err_addr, + MCA_IPID_LO_2_UMC_CH(InstanceIdLo), + MCA_IPID_LO_2_UMC_INST(InstanceIdLo), + err_info->mcm_info.die_id); + } + + /* Delete error address node from list and free memory */ + amdgpu_ras_del_mca_err_addr(err_info, mca_err_addr); } } } +static bool umc_v12_0_check_ecc_err_status(struct amdgpu_device *adev, + enum amdgpu_mca_error_type type, void *ras_error_status) +{ + uint64_t mc_umc_status = *(uint64_t *)ras_error_status; + + switch (type) { + case AMDGPU_MCA_ERROR_TYPE_UE: + return umc_v12_0_is_uncorrectable_error(adev, mc_umc_status); + case AMDGPU_MCA_ERROR_TYPE_CE: + return umc_v12_0_is_correctable_error(adev, mc_umc_status); + case AMDGPU_MCA_ERROR_TYPE_DE: + return umc_v12_0_is_deferred_error(adev, mc_umc_status); + default: + return false; + } + + return false; +} + static void umc_v12_0_err_cnt_init(struct amdgpu_device *adev) { amdgpu_umc_loop_channels(adev, @@ -444,12 +498,71 @@ const struct amdgpu_ras_block_hw_ops umc_v12_0_ras_hw_ops = { .query_ras_error_address = umc_v12_0_query_ras_error_address, }; +static int umc_v12_0_aca_bank_generate_report(struct aca_handle *handle, struct aca_bank *bank, enum aca_error_type type, + struct aca_bank_report *report, void *data) +{ + struct amdgpu_device *adev = handle->adev; + u64 status; + int ret; + + ret = aca_bank_info_decode(bank, &report->info); + if (ret) + return ret; + + status = bank->regs[ACA_REG_IDX_STATUS]; + switch (type) { + case ACA_ERROR_TYPE_UE: + if (umc_v12_0_is_uncorrectable_error(adev, status)) { + report->count[type] = 1; + } + break; + case ACA_ERROR_TYPE_CE: + if (umc_v12_0_is_correctable_error(adev, status)) { + report->count[type] = 1; + } + break; + default: + return -EINVAL; + } + + return 0; +} + +static const struct aca_bank_ops umc_v12_0_aca_bank_ops = { + .aca_bank_generate_report = umc_v12_0_aca_bank_generate_report, +}; + +const struct aca_info umc_v12_0_aca_info = { + .hwip = ACA_HWIP_TYPE_UMC, + .mask = ACA_ERROR_UE_MASK | ACA_ERROR_CE_MASK, + .bank_ops = &umc_v12_0_aca_bank_ops, +}; + +static int umc_v12_0_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) +{ + int ret; + + ret = amdgpu_umc_ras_late_init(adev, ras_block); + if (ret) + return ret; + + ret = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__UMC, + &umc_v12_0_aca_info, NULL); + if (ret) + return ret; + + return 0; +} + struct amdgpu_umc_ras umc_v12_0_ras = { .ras_block = { .hw_ops = &umc_v12_0_ras_hw_ops, + .ras_late_init = umc_v12_0_ras_late_init, }, .err_cnt_init = umc_v12_0_err_cnt_init, .query_ras_poison_mode = umc_v12_0_query_ras_poison_mode, .ecc_info_query_ras_error_count = umc_v12_0_ecc_info_query_ras_error_count, .ecc_info_query_ras_error_address = umc_v12_0_ecc_info_query_ras_error_address, + .check_ecc_err_status = umc_v12_0_check_ecc_err_status, }; + diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h index e8de3a9225..5973bfb14f 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h @@ -121,9 +121,12 @@ (((_ipid_lo) >> 12) & 0xF)) #define MCA_IPID_LO_2_UMC_INST(_ipid_lo) (((_ipid_lo) >> 21) & 0x7) +bool umc_v12_0_is_deferred_error(struct amdgpu_device *adev, uint64_t mc_umc_status); bool umc_v12_0_is_uncorrectable_error(struct amdgpu_device *adev, uint64_t mc_umc_status); bool umc_v12_0_is_correctable_error(struct amdgpu_device *adev, uint64_t mc_umc_status); +typedef bool (*check_error_type_func)(struct amdgpu_device *adev, uint64_t mc_umc_status); + extern const uint32_t umc_v12_0_channel_idx_tbl[] [UMC_V12_0_UMC_INSTANCE_NUM] diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_0.c index 0d6b50528d..97fa88ed77 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_0.c @@ -25,7 +25,7 @@ static void umc_v6_0_init_registers(struct amdgpu_device *adev) { - unsigned i,j; + unsigned i, j; for (i = 0; i < 4; i++) for (j = 0; j < 4; j++) diff --git a/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c b/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c index fd23cd3485..bd57896ab8 100644 --- a/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c @@ -60,7 +60,7 @@ static int umsch_mm_v4_0_load_microcode(struct amdgpu_umsch_mm *umsch) umsch->cmd_buf_curr_ptr = umsch->cmd_buf_ptr; - if (amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 5)) { + if (amdgpu_ip_version(adev, VCN_HWIP, 0) >= IP_VERSION(4, 0, 5)) { WREG32_SOC15(VCN, 0, regUVD_IPX_DLDO_CONFIG, 1 << UVD_IPX_DLDO_CONFIG__ONO0_PWR_CONFIG__SHIFT); SOC15_WAIT_ON_RREG(VCN, 0, regUVD_IPX_DLDO_STATUS, @@ -250,7 +250,7 @@ static int umsch_mm_v4_0_ring_stop(struct amdgpu_umsch_mm *umsch) data = REG_SET_FIELD(data, VCN_UMSCH_RB_DB_CTRL, EN, 0); WREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_DB_CTRL, data); - if (amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 5)) { + if (amdgpu_ip_version(adev, VCN_HWIP, 0) >= IP_VERSION(4, 0, 5)) { WREG32_SOC15(VCN, 0, regUVD_IPX_DLDO_CONFIG, 2 << UVD_IPX_DLDO_CONFIG__ONO0_PWR_CONFIG__SHIFT); SOC15_WAIT_ON_RREG(VCN, 0, regUVD_IPX_DLDO_STATUS, @@ -273,6 +273,8 @@ static int umsch_mm_v4_0_set_hw_resources(struct amdgpu_umsch_mm *umsch) set_hw_resources.vmid_mask_mm_vcn = umsch->vmid_mask_mm_vcn; set_hw_resources.vmid_mask_mm_vpe = umsch->vmid_mask_mm_vpe; + set_hw_resources.collaboration_mask_vpe = + adev->vpe.collaborate_mode ? 0x3 : 0x0; set_hw_resources.engine_mask = umsch->engine_mask; set_hw_resources.vcn0_hqd_mask[0] = umsch->vcn0_hqd_mask; @@ -348,6 +350,7 @@ static int umsch_mm_v4_0_add_queue(struct amdgpu_umsch_mm *umsch, add_queue.h_queue = input_ptr->h_queue; add_queue.vm_context_cntl = input_ptr->vm_context_cntl; add_queue.is_context_suspended = input_ptr->is_context_suspended; + add_queue.collaboration_mode = adev->vpe.collaborate_mode ? 1 : 0; add_queue.api_status.api_completion_fence_addr = umsch->ring.fence_drv.gpu_addr; add_queue.api_status.api_completion_fence_value = ++umsch->ring.fence_drv.sync_seq; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 25ba27151a..aaceecd558 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -304,7 +304,7 @@ static int vcn_v1_0_resume(void *handle) */ static void vcn_v1_0_mc_resume_spg_mode(struct amdgpu_device *adev) { - uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[0]->size + 4); uint32_t offset; /* cache window 0: fw */ @@ -371,7 +371,7 @@ static void vcn_v1_0_mc_resume_spg_mode(struct amdgpu_device *adev) static void vcn_v1_0_mc_resume_dpg_mode(struct amdgpu_device *adev) { - uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[0]->size + 4); uint32_t offset; /* cache window 0: fw */ diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index 18794394c5..e357d8cf0c 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -330,7 +330,7 @@ static int vcn_v2_0_resume(void *handle) */ static void vcn_v2_0_mc_resume(struct amdgpu_device *adev) { - uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[0]->size + 4); uint32_t offset; if (amdgpu_sriov_vf(adev)) @@ -386,7 +386,7 @@ static void vcn_v2_0_mc_resume(struct amdgpu_device *adev) static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirect) { - uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[0]->size + 4); uint32_t offset; /* cache window 0: fw */ @@ -1878,7 +1878,7 @@ static int vcn_v2_0_start_sriov(struct amdgpu_device *adev) init_table += header->vcn_table_offset; - size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[0]->size + 4); MMSCH_V2_0_INSERT_DIRECT_RD_MOD_WT( SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS), diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index aba403d718..1cd8a94b0f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -414,13 +414,15 @@ static int vcn_v2_5_resume(void *handle) */ static void vcn_v2_5_mc_resume(struct amdgpu_device *adev) { - uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + uint32_t size; uint32_t offset; int i; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) continue; + + size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[i]->size + 4); /* cache window 0: fw */ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, @@ -469,7 +471,7 @@ static void vcn_v2_5_mc_resume(struct amdgpu_device *adev) static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) { - uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[inst_idx]->size + 4); uint32_t offset; /* cache window 0: fw */ @@ -1240,7 +1242,7 @@ static int vcn_v2_5_sriov_start(struct amdgpu_device *adev) SOC15_REG_OFFSET(VCN, i, mmUVD_STATUS), ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY); - size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[i]->size + 4); /* mc resume*/ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { MMSCH_V1_0_INSERT_DIRECT_WT( diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index e02af4de52..8f82fb887e 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -449,7 +449,7 @@ static int vcn_v3_0_resume(void *handle) */ static void vcn_v3_0_mc_resume(struct amdgpu_device *adev, int inst) { - uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[inst]->size + 4); uint32_t offset; /* cache window 0: fw */ @@ -499,7 +499,7 @@ static void vcn_v3_0_mc_resume(struct amdgpu_device *adev, int inst) static void vcn_v3_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) { - uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[inst_idx]->size + 4); uint32_t offset; /* cache window 0: fw */ @@ -1332,7 +1332,7 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev) mmUVD_STATUS), ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY); - cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[i]->size + 4); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 8ab01ae919..832d15f7b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -382,7 +382,7 @@ static void vcn_v4_0_mc_resume(struct amdgpu_device *adev, int inst) uint32_t offset, size; const struct common_firmware_header *hdr; - hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + hdr = (const struct common_firmware_header *)adev->vcn.fw[inst]->data; size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); /* cache window 0: fw */ @@ -442,7 +442,7 @@ static void vcn_v4_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx { uint32_t offset, size; const struct common_firmware_header *hdr; - hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + hdr = (const struct common_firmware_header *)adev->vcn.fw[inst_idx]->data; size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); /* cache window 0: fw */ @@ -1289,7 +1289,7 @@ static int vcn_v4_0_start_sriov(struct amdgpu_device *adev) regUVD_STATUS), ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY); - cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[i]->size + 4); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index 810bbfccd6..203fa98832 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -332,7 +332,7 @@ static void vcn_v4_0_3_mc_resume(struct amdgpu_device *adev, int inst_idx) uint32_t offset, size, vcn_inst; const struct common_firmware_header *hdr; - hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + hdr = (const struct common_firmware_header *)adev->vcn.fw[inst_idx]->data; size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); vcn_inst = GET_INST(VCN, inst_idx); @@ -407,7 +407,7 @@ static void vcn_v4_0_3_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_i uint32_t offset, size; const struct common_firmware_header *hdr; - hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + hdr = (const struct common_firmware_header *)adev->vcn.fw[inst_idx]->data; size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); /* cache window 0: fw */ @@ -894,7 +894,7 @@ static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev) MMSCH_V4_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, 0, regUVD_STATUS), ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY); - cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[i]->size + 4); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index 49e4c3c09a..501e53e69f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -45,7 +45,7 @@ #define mmUVD_DPG_LMA_DATA_BASE_IDX regUVD_DPG_LMA_DATA_BASE_IDX #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 -#define VCN1_VID_SOC_ADDRESS_3_0 0x48300 +#define VCN1_VID_SOC_ADDRESS_3_0 (0x48300 + 0x38000) #define VCN_HARVEST_MMSCH 0 @@ -329,7 +329,7 @@ static void vcn_v4_0_5_mc_resume(struct amdgpu_device *adev, int inst) uint32_t offset, size; const struct common_firmware_header *hdr; - hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + hdr = (const struct common_firmware_header *)adev->vcn.fw[inst]->data; size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); /* cache window 0: fw */ @@ -390,7 +390,7 @@ static void vcn_v4_0_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_i uint32_t offset, size; const struct common_firmware_header *hdr; - hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + hdr = (const struct common_firmware_header *)adev->vcn.fw[inst_idx]->data; size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); /* cache window 0: fw */ @@ -486,7 +486,8 @@ static void vcn_v4_0_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_i /* VCN global tiling registers */ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( - VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); + VCN, inst_idx, regUVD_GFX10_ADDR_CONFIG), + adev->gfx.config.gb_addr_config, 0, indirect); } /** @@ -911,7 +912,6 @@ static int vcn_v4_0_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, b VCN, inst_idx, regUVD_MASTINT_EN), UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); - if (indirect) amdgpu_vcn_psp_update_sram(adev, inst_idx, 0); @@ -1684,6 +1684,9 @@ static int vcn_v4_0_5_process_interrupt(struct amdgpu_device *adev, struct amdgp case SOC15_IH_CLIENTID_VCN: ip_instance = 0; break; + case SOC15_IH_CLIENTID_VCN1: + ip_instance = 1; + break; default: DRM_ERROR("Unhandled client id: %d\n", entry->client_id); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c new file mode 100644 index 0000000000..bc60c554eb --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -0,0 +1,1339 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/firmware.h> +#include "amdgpu.h" +#include "amdgpu_vcn.h" +#include "amdgpu_pm.h" +#include "soc15.h" +#include "soc15d.h" +#include "soc15_hw_ip.h" +#include "vcn_v2_0.h" + +#include "vcn/vcn_5_0_0_offset.h" +#include "vcn/vcn_5_0_0_sh_mask.h" +#include "ivsrcid/vcn/irqsrcs_vcn_4_0.h" +#include "vcn_v5_0_0.h" + +#include <drm/drm_drv.h> + +static int amdgpu_ih_clientid_vcns[] = { + SOC15_IH_CLIENTID_VCN, + SOC15_IH_CLIENTID_VCN1 +}; + +static void vcn_v5_0_0_set_unified_ring_funcs(struct amdgpu_device *adev); +static void vcn_v5_0_0_set_irq_funcs(struct amdgpu_device *adev); +static int vcn_v5_0_0_set_powergating_state(void *handle, + enum amd_powergating_state state); +static int vcn_v5_0_0_pause_dpg_mode(struct amdgpu_device *adev, + int inst_idx, struct dpg_pause_state *new_state); +static void vcn_v5_0_0_unified_ring_set_wptr(struct amdgpu_ring *ring); + +/** + * vcn_v5_0_0_early_init - set function pointers and load microcode + * + * @handle: amdgpu_device pointer + * + * Set ring and irq function pointers + * Load microcode from filesystem + */ +static int vcn_v5_0_0_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + /* re-use enc ring as unified ring */ + adev->vcn.num_enc_rings = 1; + + vcn_v5_0_0_set_unified_ring_funcs(adev); + vcn_v5_0_0_set_irq_funcs(adev); + + return amdgpu_vcn_early_init(adev); +} + +/** + * vcn_v5_0_0_sw_init - sw init for VCN block + * + * @handle: amdgpu_device pointer + * + * Load firmware and sw initialization + */ +static int vcn_v5_0_0_sw_init(void *handle) +{ + struct amdgpu_ring *ring; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, r; + + r = amdgpu_vcn_sw_init(adev); + if (r) + return r; + + amdgpu_vcn_setup_ucode(adev); + + r = amdgpu_vcn_resume(adev); + if (r) + return r; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + volatile struct amdgpu_vcn4_fw_shared *fw_shared; + + if (adev->vcn.harvest_config & (1 << i)) + continue; + + atomic_set(&adev->vcn.inst[i].sched_score, 0); + + /* VCN UNIFIED TRAP */ + r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i], + VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[i].irq); + if (r) + return r; + + /* VCN POISON TRAP */ + r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i], + VCN_4_0__SRCID_UVD_POISON, &adev->vcn.inst[i].irq); + if (r) + return r; + + ring = &adev->vcn.inst[i].ring_enc[0]; + ring->use_doorbell = true; + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + 8 * i; + + ring->vm_hub = AMDGPU_MMHUB0(0); + sprintf(ring->name, "vcn_unified_%d", i); + + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0, + AMDGPU_RING_PRIO_0, &adev->vcn.inst[i].sched_score); + if (r) + return r; + + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE); + fw_shared->sq.is_enabled = 1; + + if (amdgpu_vcnfw_log) + amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]); + } + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + adev->vcn.pause_dpg_mode = vcn_v5_0_0_pause_dpg_mode; + + return 0; +} + +/** + * vcn_v5_0_0_sw_fini - sw fini for VCN block + * + * @handle: amdgpu_device pointer + * + * VCN suspend and free up sw allocation + */ +static int vcn_v5_0_0_sw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, r, idx; + + if (drm_dev_enter(adev_to_drm(adev), &idx)) { + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + volatile struct amdgpu_vcn4_fw_shared *fw_shared; + + if (adev->vcn.harvest_config & (1 << i)) + continue; + + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + fw_shared->present_flag_0 = 0; + fw_shared->sq.is_enabled = 0; + } + + drm_dev_exit(idx); + } + + r = amdgpu_vcn_suspend(adev); + if (r) + return r; + + r = amdgpu_vcn_sw_fini(adev); + + return r; +} + +/** + * vcn_v5_0_0_hw_init - start and test VCN block + * + * @handle: amdgpu_device pointer + * + * Initialize the hardware, boot up the VCPU and do some testing + */ +static int vcn_v5_0_0_hw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring; + int i, r; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + ring = &adev->vcn.inst[i].ring_enc[0]; + + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i), i); + + r = amdgpu_ring_test_helper(ring); + if (r) + goto done; + } + +done: + if (!r) + DRM_INFO("VCN decode and encode initialized successfully(under %s).\n", + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode"); + + return r; +} + +/** + * vcn_v5_0_0_hw_fini - stop the hardware block + * + * @handle: amdgpu_device pointer + * + * Stop the VCN block, mark ring as not ready any more + */ +static int vcn_v5_0_0_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i; + + cancel_delayed_work_sync(&adev->vcn.idle_work); + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + amdgpu_irq_put(adev, &adev->vcn.inst[i].irq, 0); + } + + return 0; +} + +/** + * vcn_v5_0_0_suspend - suspend VCN block + * + * @handle: amdgpu_device pointer + * + * HW fini and suspend VCN block + */ +static int vcn_v5_0_0_suspend(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = vcn_v5_0_0_hw_fini(adev); + if (r) + return r; + + r = amdgpu_vcn_suspend(adev); + + return r; +} + +/** + * vcn_v5_0_0_resume - resume VCN block + * + * @handle: amdgpu_device pointer + * + * Resume firmware and hw init VCN block + */ +static int vcn_v5_0_0_resume(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = amdgpu_vcn_resume(adev); + if (r) + return r; + + r = vcn_v5_0_0_hw_init(adev); + + return r; +} + +/** + * vcn_v5_0_0_mc_resume - memory controller programming + * + * @adev: amdgpu_device pointer + * @inst: instance number + * + * Let the VCN memory controller know it's offsets + */ +static void vcn_v5_0_0_mc_resume(struct amdgpu_device *adev, int inst) +{ + uint32_t offset, size; + const struct common_firmware_header *hdr; + + hdr = (const struct common_firmware_header *)adev->vcn.fw[inst]->data; + size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); + + /* cache window 0: fw */ + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_lo)); + WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_hi)); + WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET0, 0); + offset = 0; + } else { + WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[inst].gpu_addr)); + WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[inst].gpu_addr)); + offset = size; + WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET0, AMDGPU_UVD_FIRMWARE_OFFSET >> 3); + } + WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_SIZE0, size); + + /* cache window 1: stack */ + WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset)); + WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset)); + WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET1, 0); + WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE); + + /* cache window 2: context */ + WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET2, 0); + WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); + + /* non-cache window */ + WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr)); + WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr)); + WREG32_SOC15(VCN, inst, regUVD_VCPU_NONCACHE_OFFSET0, 0); + WREG32_SOC15(VCN, inst, regUVD_VCPU_NONCACHE_SIZE0, + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared))); +} + +/** + * vcn_v5_0_0_mc_resume_dpg_mode - memory controller programming for dpg mode + * + * @adev: amdgpu_device pointer + * @inst_idx: instance number index + * @indirect: indirectly write sram + * + * Let the VCN memory controller know it's offsets with dpg mode + */ +static void vcn_v5_0_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) +{ + uint32_t offset, size; + const struct common_firmware_header *hdr; + + hdr = (const struct common_firmware_header *)adev->vcn.fw[inst_idx]->data; + size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); + + /* cache window 0: fw */ + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + if (!indirect) { + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_lo), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_hi), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); + } else { + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); + } + offset = 0; + } else { + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect); + offset = size; + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0), + AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect); + } + + if (!indirect) + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_CACHE_SIZE0), size, 0, indirect); + else + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_CACHE_SIZE0), 0, 0, indirect); + + /* cache window 1: stack */ + if (!indirect) { + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); + } else { + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); + } + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect); + + /* cache window 2: context */ + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect); + + /* non-cache window */ + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_NONCACHE_SIZE0), + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)), 0, indirect); + + /* VCN global tiling registers */ + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); + + return; +} + +/** + * vcn_v5_0_0_disable_static_power_gating - disable VCN static power gating + * + * @adev: amdgpu_device pointer + * @inst: instance number + * + * Disable static power gating for VCN block + */ +static void vcn_v5_0_0_disable_static_power_gating(struct amdgpu_device *adev, int inst) +{ + uint32_t data = 0; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN) { + data = 1 << UVD_IPX_DLDO_CONFIG__ONO2_PWR_CONFIG__SHIFT; + WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data); + SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, 0, + UVD_IPX_DLDO_STATUS__ONO2_PWR_STATUS_MASK); + + data = 2 << UVD_IPX_DLDO_CONFIG__ONO3_PWR_CONFIG__SHIFT; + WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data); + SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, + 1 << UVD_IPX_DLDO_STATUS__ONO3_PWR_STATUS__SHIFT, + UVD_IPX_DLDO_STATUS__ONO3_PWR_STATUS_MASK); + + data = 2 << UVD_IPX_DLDO_CONFIG__ONO4_PWR_CONFIG__SHIFT; + WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data); + SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, + 1 << UVD_IPX_DLDO_STATUS__ONO4_PWR_STATUS__SHIFT, + UVD_IPX_DLDO_STATUS__ONO4_PWR_STATUS_MASK); + + data = 2 << UVD_IPX_DLDO_CONFIG__ONO5_PWR_CONFIG__SHIFT; + WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data); + SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, + 1 << UVD_IPX_DLDO_STATUS__ONO5_PWR_STATUS__SHIFT, + UVD_IPX_DLDO_STATUS__ONO5_PWR_STATUS_MASK); + } else { + data = 1 << UVD_IPX_DLDO_CONFIG__ONO2_PWR_CONFIG__SHIFT; + WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data); + SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, 0, + UVD_IPX_DLDO_STATUS__ONO2_PWR_STATUS_MASK); + + data = 1 << UVD_IPX_DLDO_CONFIG__ONO3_PWR_CONFIG__SHIFT; + WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data); + SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, 0, + UVD_IPX_DLDO_STATUS__ONO3_PWR_STATUS_MASK); + + data = 1 << UVD_IPX_DLDO_CONFIG__ONO4_PWR_CONFIG__SHIFT; + WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data); + SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, 0, + UVD_IPX_DLDO_STATUS__ONO4_PWR_STATUS_MASK); + + data = 1 << UVD_IPX_DLDO_CONFIG__ONO5_PWR_CONFIG__SHIFT; + WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data); + SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, 0, + UVD_IPX_DLDO_STATUS__ONO5_PWR_STATUS_MASK); + } + + data = RREG32_SOC15(VCN, inst, regUVD_POWER_STATUS); + data &= ~0x103; + if (adev->pg_flags & AMD_PG_SUPPORT_VCN) + data |= UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON | + UVD_POWER_STATUS__UVD_PG_EN_MASK; + + WREG32_SOC15(VCN, inst, regUVD_POWER_STATUS, data); + return; +} + +/** + * vcn_v5_0_0_enable_static_power_gating - enable VCN static power gating + * + * @adev: amdgpu_device pointer + * @inst: instance number + * + * Enable static power gating for VCN block + */ +static void vcn_v5_0_0_enable_static_power_gating(struct amdgpu_device *adev, int inst) +{ + uint32_t data; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN) { + /* Before power off, this indicator has to be turned on */ + data = RREG32_SOC15(VCN, inst, regUVD_POWER_STATUS); + data &= ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK; + data |= UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF; + WREG32_SOC15(VCN, inst, regUVD_POWER_STATUS, data); + + data = 2 << UVD_IPX_DLDO_CONFIG__ONO5_PWR_CONFIG__SHIFT; + WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data); + SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, + 1 << UVD_IPX_DLDO_STATUS__ONO5_PWR_STATUS__SHIFT, + UVD_IPX_DLDO_STATUS__ONO5_PWR_STATUS_MASK); + + data = 2 << UVD_IPX_DLDO_CONFIG__ONO4_PWR_CONFIG__SHIFT; + WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data); + SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, + 1 << UVD_IPX_DLDO_STATUS__ONO4_PWR_STATUS__SHIFT, + UVD_IPX_DLDO_STATUS__ONO4_PWR_STATUS_MASK); + + data = 2 << UVD_IPX_DLDO_CONFIG__ONO3_PWR_CONFIG__SHIFT; + WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data); + SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, + 1 << UVD_IPX_DLDO_STATUS__ONO3_PWR_STATUS__SHIFT, + UVD_IPX_DLDO_STATUS__ONO3_PWR_STATUS_MASK); + + data = 2 << UVD_IPX_DLDO_CONFIG__ONO2_PWR_CONFIG__SHIFT; + WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data); + SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, + 1 << UVD_IPX_DLDO_STATUS__ONO2_PWR_STATUS__SHIFT, + UVD_IPX_DLDO_STATUS__ONO2_PWR_STATUS_MASK); + } + return; +} + +/** + * vcn_v5_0_0_disable_clock_gating - disable VCN clock gating + * + * @adev: amdgpu_device pointer + * @inst: instance number + * + * Disable clock gating for VCN block + */ +static void vcn_v5_0_0_disable_clock_gating(struct amdgpu_device *adev, int inst) +{ + return; +} + +#if 0 +/** + * vcn_v5_0_0_disable_clock_gating_dpg_mode - disable VCN clock gating dpg mode + * + * @adev: amdgpu_device pointer + * @sram_sel: sram select + * @inst_idx: instance number index + * @indirect: indirectly write sram + * + * Disable clock gating for VCN block with dpg mode + */ +static void vcn_v5_0_0_disable_clock_gating_dpg_mode(struct amdgpu_device *adev, uint8_t sram_sel, + int inst_idx, uint8_t indirect) +{ + return; +} +#endif + +/** + * vcn_v5_0_0_enable_clock_gating - enable VCN clock gating + * + * @adev: amdgpu_device pointer + * @inst: instance number + * + * Enable clock gating for VCN block + */ +static void vcn_v5_0_0_enable_clock_gating(struct amdgpu_device *adev, int inst) +{ + return; +} + +/** + * vcn_v5_0_0_start_dpg_mode - VCN start with dpg mode + * + * @adev: amdgpu_device pointer + * @inst_idx: instance number index + * @indirect: indirectly write sram + * + * Start VCN block with dpg mode + */ +static int vcn_v5_0_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) +{ + volatile struct amdgpu_vcn4_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; + struct amdgpu_ring *ring; + uint32_t tmp; + + /* disable register anti-hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 1, + ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + + /* enable dynamic power gating mode */ + tmp = RREG32_SOC15(VCN, inst_idx, regUVD_POWER_STATUS); + tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK; + tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK; + WREG32_SOC15(VCN, inst_idx, regUVD_POWER_STATUS, tmp); + + if (indirect) + adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr; + + /* enable VCPU clock */ + tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); + tmp |= UVD_VCPU_CNTL__CLK_EN_MASK | UVD_VCPU_CNTL__BLK_RST_MASK; + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_CNTL), tmp, 0, indirect); + + /* disable master interrupt */ + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_MASTINT_EN), 0, 0, indirect); + + /* setup regUVD_LMI_CTRL */ + tmp = (UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__REQ_MODE_MASK | + UVD_LMI_CTRL__CRC_RESET_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | + (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | + 0x00100000L); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_CTRL), tmp, 0, indirect); + + vcn_v5_0_0_mc_resume_dpg_mode(adev, inst_idx, indirect); + + tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); + tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_CNTL), tmp, 0, indirect); + + /* enable LMI MC and UMC channels */ + tmp = 0x1f << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT; + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_CTRL2), tmp, 0, indirect); + + /* enable master interrupt */ + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_MASTINT_EN), + UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); + + if (indirect) + amdgpu_vcn_psp_update_sram(adev, inst_idx, 0); + + ring = &adev->vcn.inst[inst_idx].ring_enc[0]; + + WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE, ring->ring_size / 4); + + tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE); + tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK); + WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; + WREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR, 0); + WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, 0); + + tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR); + WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, tmp); + ring->wptr = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR); + + tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE); + tmp |= VCN_RB_ENABLE__RB1_EN_MASK; + WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); + + WREG32_SOC15(VCN, inst_idx, regVCN_RB1_DB_CTRL, + ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | + VCN_RB1_DB_CTRL__EN_MASK); + + return 0; +} + +/** + * vcn_v5_0_0_start - VCN start + * + * @adev: amdgpu_device pointer + * + * Start VCN block + */ +static int vcn_v5_0_0_start(struct amdgpu_device *adev) +{ + volatile struct amdgpu_vcn4_fw_shared *fw_shared; + struct amdgpu_ring *ring; + uint32_t tmp; + int i, j, k, r; + + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_uvd(adev, true); + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + r = vcn_v5_0_0_start_dpg_mode(adev, i, adev->vcn.indirect_sram); + continue; + } + + /* disable VCN power gating */ + vcn_v5_0_0_disable_static_power_gating(adev, i); + + /* set VCN status busy */ + tmp = RREG32_SOC15(VCN, i, regUVD_STATUS) | UVD_STATUS__UVD_BUSY; + WREG32_SOC15(VCN, i, regUVD_STATUS, tmp); + + /* enable VCPU clock */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), + UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK); + + /* disable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN), 0, + ~UVD_MASTINT_EN__VCPU_EN_MASK); + + /* enable LMI MC and UMC channels */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_LMI_CTRL2), 0, + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + + tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET); + tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; + tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; + WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp); + + /* setup regUVD_LMI_CTRL */ + tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL); + WREG32_SOC15(VCN, i, regUVD_LMI_CTRL, tmp | + UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK); + + vcn_v5_0_0_mc_resume(adev, i); + + /* VCN global tiling registers */ + WREG32_SOC15(VCN, i, regUVD_GFX10_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + + /* unblock VCPU register access */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL), 0, + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + + /* release VCPU reset to boot */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + for (j = 0; j < 10; ++j) { + uint32_t status; + + for (k = 0; k < 100; ++k) { + status = RREG32_SOC15(VCN, i, regUVD_STATUS); + if (status & 2) + break; + mdelay(10); + if (amdgpu_emu_mode == 1) + msleep(1); + } + + if (amdgpu_emu_mode == 1) { + r = -1; + if (status & 2) { + r = 0; + break; + } + } else { + r = 0; + if (status & 2) + break; + + dev_err(adev->dev, + "VCN[%d] is not responding, trying to reset the VCPU!!!\n", i); + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), + UVD_VCPU_CNTL__BLK_RST_MASK, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + mdelay(10); + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + mdelay(10); + r = -1; + } + } + + if (r) { + dev_err(adev->dev, "VCN[%d] is not responding, giving up!!!\n", i); + return r; + } + + /* enable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN), + UVD_MASTINT_EN__VCPU_EN_MASK, + ~UVD_MASTINT_EN__VCPU_EN_MASK); + + /* clear the busy bit of VCN_STATUS */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_STATUS), 0, + ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); + + ring = &adev->vcn.inst[i].ring_enc[0]; + WREG32_SOC15(VCN, i, regVCN_RB1_DB_CTRL, + ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | + VCN_RB1_DB_CTRL__EN_MASK); + + WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, i, regUVD_RB_SIZE, ring->ring_size / 4); + + tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE); + tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK); + WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; + WREG32_SOC15(VCN, i, regUVD_RB_RPTR, 0); + WREG32_SOC15(VCN, i, regUVD_RB_WPTR, 0); + + tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR); + WREG32_SOC15(VCN, i, regUVD_RB_WPTR, tmp); + ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR); + + tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE); + tmp |= VCN_RB_ENABLE__RB1_EN_MASK; + WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); + } + + return 0; +} + +/** + * vcn_v5_0_0_stop_dpg_mode - VCN stop with dpg mode + * + * @adev: amdgpu_device pointer + * @inst_idx: instance number index + * + * Stop VCN block with dpg mode + */ +static void vcn_v5_0_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) +{ + struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE}; + uint32_t tmp; + + vcn_v5_0_0_pause_dpg_mode(adev, inst_idx, &state); + + /* Wait for power status to be 1 */ + SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 1, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + + /* wait for read ptr to be equal to write ptr */ + tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR); + SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_RB_RPTR, tmp, 0xFFFFFFFF); + + /* disable dynamic power gating mode */ + WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 0, + ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); + + return; +} + +/** + * vcn_v5_0_0_stop - VCN stop + * + * @adev: amdgpu_device pointer + * + * Stop VCN block + */ +static int vcn_v5_0_0_stop(struct amdgpu_device *adev) +{ + volatile struct amdgpu_vcn4_fw_shared *fw_shared; + uint32_t tmp; + int i, r = 0; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + vcn_v5_0_0_stop_dpg_mode(adev, i); + continue; + } + + /* wait for vcn idle */ + r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_STATUS, UVD_STATUS__IDLE, 0x7); + if (r) + return r; + + tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK | + UVD_LMI_STATUS__READ_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK; + r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp); + if (r) + return r; + + /* disable LMI UMC channel */ + tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL2); + tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK; + WREG32_SOC15(VCN, i, regUVD_LMI_CTRL2, tmp); + tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK | + UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK; + r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp); + if (r) + return r; + + /* block VCPU register access */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL), + UVD_RB_ARB_CTRL__VCPU_DIS_MASK, + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + + /* reset VCPU */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), + UVD_VCPU_CNTL__BLK_RST_MASK, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + /* disable VCPU clock */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0, + ~(UVD_VCPU_CNTL__CLK_EN_MASK)); + + /* apply soft reset */ + tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET); + tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; + WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp); + tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET); + tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; + WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp); + + /* clear status */ + WREG32_SOC15(VCN, i, regUVD_STATUS, 0); + + /* enable VCN power gating */ + vcn_v5_0_0_enable_static_power_gating(adev, i); + } + + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_uvd(adev, false); + + return 0; +} + +/** + * vcn_v5_0_0_pause_dpg_mode - VCN pause with dpg mode + * + * @adev: amdgpu_device pointer + * @inst_idx: instance number index + * @new_state: pause state + * + * Pause dpg mode for VCN block + */ +static int vcn_v5_0_0_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx, + struct dpg_pause_state *new_state) +{ + uint32_t reg_data = 0; + int ret_code; + + /* pause/unpause if state is changed */ + if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) { + DRM_DEV_DEBUG(adev->dev, "dpg pause state changed %d -> %d", + adev->vcn.inst[inst_idx].pause_state.fw_based, new_state->fw_based); + reg_data = RREG32_SOC15(VCN, inst_idx, regUVD_DPG_PAUSE) & + (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); + + if (new_state->fw_based == VCN_DPG_STATE__PAUSE) { + ret_code = SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 0x1, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + + if (!ret_code) { + /* pause DPG */ + reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(VCN, inst_idx, regUVD_DPG_PAUSE, reg_data); + + /* wait for ACK */ + SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_DPG_PAUSE, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); + } + } else { + /* unpause dpg, no need to wait */ + reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(VCN, inst_idx, regUVD_DPG_PAUSE, reg_data); + } + adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based; + } + + return 0; +} + +/** + * vcn_v5_0_0_unified_ring_get_rptr - get unified read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware unified read pointer + */ +static uint64_t vcn_v5_0_0_unified_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) + DRM_ERROR("wrong ring id is identified in %s", __func__); + + return RREG32_SOC15(VCN, ring->me, regUVD_RB_RPTR); +} + +/** + * vcn_v5_0_0_unified_ring_get_wptr - get unified write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware unified write pointer + */ +static uint64_t vcn_v5_0_0_unified_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) + DRM_ERROR("wrong ring id is identified in %s", __func__); + + if (ring->use_doorbell) + return *ring->wptr_cpu_addr; + else + return RREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR); +} + +/** + * vcn_v5_0_0_unified_ring_set_wptr - set enc write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the enc write pointer to the hardware + */ +static void vcn_v5_0_0_unified_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) + DRM_ERROR("wrong ring id is identified in %s", __func__); + + if (ring->use_doorbell) { + *ring->wptr_cpu_addr = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR, lower_32_bits(ring->wptr)); + } +} + +static const struct amdgpu_ring_funcs vcn_v5_0_0_unified_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_ENC, + .align_mask = 0x3f, + .nop = VCN_ENC_CMD_NO_OP, + .get_rptr = vcn_v5_0_0_unified_ring_get_rptr, + .get_wptr = vcn_v5_0_0_unified_ring_get_wptr, + .set_wptr = vcn_v5_0_0_unified_ring_set_wptr, + .emit_frame_size = + SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + + 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */ + 5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */ + 1, /* vcn_v2_0_enc_ring_insert_end */ + .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */ + .emit_ib = vcn_v2_0_enc_ring_emit_ib, + .emit_fence = vcn_v2_0_enc_ring_emit_fence, + .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush, + .test_ring = amdgpu_vcn_enc_ring_test_ring, + .test_ib = amdgpu_vcn_unified_ring_test_ib, + .insert_nop = amdgpu_ring_insert_nop, + .insert_end = vcn_v2_0_enc_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_vcn_ring_begin_use, + .end_use = amdgpu_vcn_ring_end_use, + .emit_wreg = vcn_v2_0_enc_ring_emit_wreg, + .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +/** + * vcn_v5_0_0_set_unified_ring_funcs - set unified ring functions + * + * @adev: amdgpu_device pointer + * + * Set unified ring functions + */ +static void vcn_v5_0_0_set_unified_ring_funcs(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + adev->vcn.inst[i].ring_enc[0].funcs = &vcn_v5_0_0_unified_ring_vm_funcs; + adev->vcn.inst[i].ring_enc[0].me = i; + + DRM_INFO("VCN(%d) encode/decode are enabled in VM mode\n", i); + } +} + +/** + * vcn_v5_0_0_is_idle - check VCN block is idle + * + * @handle: amdgpu_device pointer + * + * Check whether VCN block is idle + */ +static bool vcn_v5_0_0_is_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, ret = 1; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + ret &= (RREG32_SOC15(VCN, i, regUVD_STATUS) == UVD_STATUS__IDLE); + } + + return ret; +} + +/** + * vcn_v5_0_0_wait_for_idle - wait for VCN block idle + * + * @handle: amdgpu_device pointer + * + * Wait for VCN block idle + */ +static int vcn_v5_0_0_wait_for_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, ret = 0; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + ret = SOC15_WAIT_ON_RREG(VCN, i, regUVD_STATUS, UVD_STATUS__IDLE, + UVD_STATUS__IDLE); + if (ret) + return ret; + } + + return ret; +} + +/** + * vcn_v5_0_0_set_clockgating_state - set VCN block clockgating state + * + * @handle: amdgpu_device pointer + * @state: clock gating state + * + * Set VCN block clockgating state + */ +static int vcn_v5_0_0_set_clockgating_state(void *handle, enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + if (enable) { + if (RREG32_SOC15(VCN, i, regUVD_STATUS) != UVD_STATUS__IDLE) + return -EBUSY; + vcn_v5_0_0_enable_clock_gating(adev, i); + } else { + vcn_v5_0_0_disable_clock_gating(adev, i); + } + } + + return 0; +} + +/** + * vcn_v5_0_0_set_powergating_state - set VCN block powergating state + * + * @handle: amdgpu_device pointer + * @state: power gating state + * + * Set VCN block powergating state + */ +static int vcn_v5_0_0_set_powergating_state(void *handle, enum amd_powergating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int ret; + + if (state == adev->vcn.cur_state) + return 0; + + if (state == AMD_PG_STATE_GATE) + ret = vcn_v5_0_0_stop(adev); + else + ret = vcn_v5_0_0_start(adev); + + if (!ret) + adev->vcn.cur_state = state; + + return ret; +} + +/** + * vcn_v5_0_0_set_interrupt_state - set VCN block interrupt state + * + * @adev: amdgpu_device pointer + * @source: interrupt sources + * @type: interrupt types + * @state: interrupt states + * + * Set VCN block interrupt state + */ +static int vcn_v5_0_0_set_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, + unsigned type, enum amdgpu_interrupt_state state) +{ + return 0; +} + +/** + * vcn_v5_0_0_process_interrupt - process VCN block interrupt + * + * @adev: amdgpu_device pointer + * @source: interrupt sources + * @entry: interrupt entry from clients and sources + * + * Process VCN block interrupt + */ +static int vcn_v5_0_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + uint32_t ip_instance; + + switch (entry->client_id) { + case SOC15_IH_CLIENTID_VCN: + ip_instance = 0; + break; + case SOC15_IH_CLIENTID_VCN1: + ip_instance = 1; + break; + default: + DRM_ERROR("Unhandled client id: %d\n", entry->client_id); + return 0; + } + + DRM_DEBUG("IH: VCN TRAP\n"); + + switch (entry->src_id) { + case VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE: + amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]); + break; + case VCN_4_0__SRCID_UVD_POISON: + amdgpu_vcn_process_poison_irq(adev, source, entry); + break; + default: + DRM_ERROR("Unhandled interrupt: %d %d\n", + entry->src_id, entry->src_data[0]); + break; + } + + return 0; +} + +static const struct amdgpu_irq_src_funcs vcn_v5_0_0_irq_funcs = { + .set = vcn_v5_0_0_set_interrupt_state, + .process = vcn_v5_0_0_process_interrupt, +}; + +/** + * vcn_v5_0_0_set_irq_funcs - set VCN block interrupt irq functions + * + * @adev: amdgpu_device pointer + * + * Set VCN block interrupt irq functions + */ +static void vcn_v5_0_0_set_irq_funcs(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 1; + adev->vcn.inst[i].irq.funcs = &vcn_v5_0_0_irq_funcs; + } +} + +static const struct amd_ip_funcs vcn_v5_0_0_ip_funcs = { + .name = "vcn_v5_0_0", + .early_init = vcn_v5_0_0_early_init, + .late_init = NULL, + .sw_init = vcn_v5_0_0_sw_init, + .sw_fini = vcn_v5_0_0_sw_fini, + .hw_init = vcn_v5_0_0_hw_init, + .hw_fini = vcn_v5_0_0_hw_fini, + .suspend = vcn_v5_0_0_suspend, + .resume = vcn_v5_0_0_resume, + .is_idle = vcn_v5_0_0_is_idle, + .wait_for_idle = vcn_v5_0_0_wait_for_idle, + .check_soft_reset = NULL, + .pre_soft_reset = NULL, + .soft_reset = NULL, + .post_soft_reset = NULL, + .set_clockgating_state = vcn_v5_0_0_set_clockgating_state, + .set_powergating_state = vcn_v5_0_0_set_powergating_state, +}; + +const struct amdgpu_ip_block_version vcn_v5_0_0_ip_block = { + .type = AMD_IP_BLOCK_TYPE_VCN, + .major = 5, + .minor = 0, + .rev = 0, + .funcs = &vcn_v5_0_0_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h new file mode 100644 index 0000000000..51bbccd436 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h @@ -0,0 +1,37 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __VCN_V5_0_0_H__ +#define __VCN_V5_0_0_H__ + +#define VCN_VID_SOC_ADDRESS 0x1FC00 +#define VCN_AON_SOC_ADDRESS 0x1F800 +#define VCN1_VID_SOC_ADDRESS 0x48300 +#define VCN1_AON_SOC_ADDRESS 0x48000 + +#define VCN_VID_IP_ADDRESS 0x0 +#define VCN_AON_IP_ADDRESS 0x30000 + +extern const struct amdgpu_ip_block_version vcn_v5_0_0_ip_block; + +#endif /* __VCN_V5_0_0_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c index db66e6ccca..b9e7858466 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c @@ -291,27 +291,29 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev) adev->nbio.funcs->ih_control(adev); - if ((amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 2, 1)) && - adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { - ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN); - if (adev->irq.ih.use_bus_addr) { - ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN, - MC_SPACE_GPA_ENABLE, 1); + if (!amdgpu_sriov_vf(adev)) { + if ((amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 2, 1)) && + adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { + ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN); + if (adev->irq.ih.use_bus_addr) { + ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN, + MC_SPACE_GPA_ENABLE, 1); + } + WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN, ih_chicken); } - WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN, ih_chicken); - } - /* psp firmware won't program IH_CHICKEN for aldebaran - * driver needs to program it properly according to - * MC_SPACE type in IH_RB_CNTL */ - if ((amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 0)) || - (amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 2))) { - ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN_ALDEBARAN); - if (adev->irq.ih.use_bus_addr) { - ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN, - MC_SPACE_GPA_ENABLE, 1); + /* psp firmware won't program IH_CHICKEN for aldebaran + * driver needs to program it properly according to + * MC_SPACE type in IH_RB_CNTL */ + if ((amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 0)) || + (amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 2))) { + ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN_ALDEBARAN); + if (adev->irq.ih.use_bus_addr) { + ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN, + MC_SPACE_GPA_ENABLE, 1); + } + WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN_ALDEBARAN, ih_chicken); } - WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN_ALDEBARAN, ih_chicken); } for (i = 0; i < ARRAY_SIZE(ih); i++) { diff --git a/drivers/gpu/drm/amd/amdgpu/vpe_6_1_fw_if.h b/drivers/gpu/drm/amd/amdgpu/vpe_6_1_fw_if.h index 9b550deb48..47534dbbd1 100644 --- a/drivers/gpu/drm/amd/amdgpu/vpe_6_1_fw_if.h +++ b/drivers/gpu/drm/amd/amdgpu/vpe_6_1_fw_if.h @@ -40,7 +40,8 @@ enum VPE_CMD_OPCODE { VPE_CMD_OPCODE_POLL_REGMEM = 0x8, VPE_CMD_OPCODE_COND_EXE = 0x9, VPE_CMD_OPCODE_ATOMIC = 0xA, - VPE_CMD_OPCODE_PLANE_FILL = 0xB, + VPE_CMD_OPCODE_PRED_EXE = 0xB, + VPE_CMD_OPCODE_COLLAB_SYNC = 0xC, VPE_CMD_OPCODE_TIMESTAMP = 0xD }; diff --git a/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c b/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c index d20060a51e..09315dd5a1 100644 --- a/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c @@ -33,14 +33,38 @@ #include "vpe/vpe_6_1_0_sh_mask.h" MODULE_FIRMWARE("amdgpu/vpe_6_1_0.bin"); +MODULE_FIRMWARE("amdgpu/vpe_6_1_1.bin"); #define VPE_THREAD1_UCODE_OFFSET 0x8000 +#define regVPEC_COLLABORATE_CNTL 0x0013 +#define regVPEC_COLLABORATE_CNTL_BASE_IDX 0 +#define VPEC_COLLABORATE_CNTL__COLLABORATE_MODE_EN__SHIFT 0x0 +#define VPEC_COLLABORATE_CNTL__COLLABORATE_MODE_EN_MASK 0x00000001L + +#define regVPEC_COLLABORATE_CFG 0x0014 +#define regVPEC_COLLABORATE_CFG_BASE_IDX 0 +#define VPEC_COLLABORATE_CFG__MASTER_ID__SHIFT 0x0 +#define VPEC_COLLABORATE_CFG__MASTER_EN__SHIFT 0x3 +#define VPEC_COLLABORATE_CFG__SLAVE0_ID__SHIFT 0x4 +#define VPEC_COLLABORATE_CFG__SLAVE0_EN__SHIFT 0x7 +#define VPEC_COLLABORATE_CFG__MASTER_ID_MASK 0x00000007L +#define VPEC_COLLABORATE_CFG__MASTER_EN_MASK 0x00000008L +#define VPEC_COLLABORATE_CFG__SLAVE0_ID_MASK 0x00000070L +#define VPEC_COLLABORATE_CFG__SLAVE0_EN_MASK 0x00000080L + +#define regVPEC_CNTL_6_1_1 0x0016 +#define regVPEC_CNTL_6_1_1_BASE_IDX 0 +#define regVPEC_QUEUE_RESET_REQ_6_1_1 0x002c +#define regVPEC_QUEUE_RESET_REQ_6_1_1_BASE_IDX 0 +#define regVPEC_PUB_DUMMY2_6_1_1 0x004c +#define regVPEC_PUB_DUMMY2_6_1_1_BASE_IDX 0 + static uint32_t vpe_v6_1_get_reg_offset(struct amdgpu_vpe *vpe, uint32_t inst, uint32_t offset) { uint32_t base; - base = vpe->ring.adev->reg_offset[VPE_HWIP][0][0]; + base = vpe->ring.adev->reg_offset[VPE_HWIP][inst][0]; return base + offset; } @@ -48,12 +72,14 @@ static uint32_t vpe_v6_1_get_reg_offset(struct amdgpu_vpe *vpe, uint32_t inst, u static void vpe_v6_1_halt(struct amdgpu_vpe *vpe, bool halt) { struct amdgpu_device *adev = vpe->ring.adev; - uint32_t f32_cntl; + uint32_t i, f32_cntl; - f32_cntl = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_F32_CNTL)); - f32_cntl = REG_SET_FIELD(f32_cntl, VPEC_F32_CNTL, HALT, halt ? 1 : 0); - f32_cntl = REG_SET_FIELD(f32_cntl, VPEC_F32_CNTL, TH1_RESET, halt ? 1 : 0); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_F32_CNTL), f32_cntl); + for (i = 0; i < vpe->num_instances; i++) { + f32_cntl = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_F32_CNTL)); + f32_cntl = REG_SET_FIELD(f32_cntl, VPEC_F32_CNTL, HALT, halt ? 1 : 0); + f32_cntl = REG_SET_FIELD(f32_cntl, VPEC_F32_CNTL, TH1_RESET, halt ? 1 : 0); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_F32_CNTL), f32_cntl); + } } static int vpe_v6_1_irq_init(struct amdgpu_vpe *vpe) @@ -70,20 +96,64 @@ static int vpe_v6_1_irq_init(struct amdgpu_vpe *vpe) return 0; } +static void vpe_v6_1_set_collaborate_mode(struct amdgpu_vpe *vpe, bool enable) +{ + struct amdgpu_device *adev = vpe->ring.adev; + uint32_t vpe_colla_cntl, vpe_colla_cfg, i; + + if (!vpe->collaborate_mode) + return; + + for (i = 0; i < vpe->num_instances; i++) { + vpe_colla_cntl = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_COLLABORATE_CNTL)); + vpe_colla_cntl = REG_SET_FIELD(vpe_colla_cntl, VPEC_COLLABORATE_CNTL, + COLLABORATE_MODE_EN, enable ? 1 : 0); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_COLLABORATE_CNTL), vpe_colla_cntl); + + vpe_colla_cfg = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_COLLABORATE_CFG)); + vpe_colla_cfg = REG_SET_FIELD(vpe_colla_cfg, VPEC_COLLABORATE_CFG, MASTER_ID, 0); + vpe_colla_cfg = REG_SET_FIELD(vpe_colla_cfg, VPEC_COLLABORATE_CFG, MASTER_EN, enable ? 1 : 0); + vpe_colla_cfg = REG_SET_FIELD(vpe_colla_cfg, VPEC_COLLABORATE_CFG, SLAVE0_ID, 1); + vpe_colla_cfg = REG_SET_FIELD(vpe_colla_cfg, VPEC_COLLABORATE_CFG, SLAVE0_EN, enable ? 1 : 0); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_COLLABORATE_CFG), vpe_colla_cfg); + } +} + static int vpe_v6_1_load_microcode(struct amdgpu_vpe *vpe) { struct amdgpu_device *adev = vpe->ring.adev; const struct vpe_firmware_header_v1_0 *vpe_hdr; const __le32 *data; uint32_t ucode_offset[2], ucode_size[2]; - uint32_t i, size_dw; + uint32_t i, j, size_dw; uint32_t ret; - // disable UMSCH_INT_ENABLE - ret = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL)); - ret = REG_SET_FIELD(ret, VPEC_CNTL, UMSCH_INT_ENABLE, 0); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL), ret); + /* disable UMSCH_INT_ENABLE */ + for (j = 0; j < vpe->num_instances; j++) { + + if (amdgpu_ip_version(adev, VPE_HWIP, 0) == IP_VERSION(6, 1, 1)) + ret = RREG32(vpe_get_reg_offset(vpe, j, regVPEC_CNTL_6_1_1)); + else + ret = RREG32(vpe_get_reg_offset(vpe, j, regVPEC_CNTL)); + + ret = REG_SET_FIELD(ret, VPEC_CNTL, UMSCH_INT_ENABLE, 0); + + if (amdgpu_ip_version(adev, VPE_HWIP, 0) == IP_VERSION(6, 1, 1)) + WREG32(vpe_get_reg_offset(vpe, j, regVPEC_CNTL_6_1_1), ret); + else + WREG32(vpe_get_reg_offset(vpe, j, regVPEC_CNTL), ret); + } + + /* setup collaborate mode */ + vpe_v6_1_set_collaborate_mode(vpe, true); + /* setup DPM */ + if (amdgpu_vpe_configure_dpm(vpe)) + dev_warn(adev->dev, "VPE failed to enable DPM\n"); + /* + * For VPE 6.1.1, still only need to add master's offset, and psp will apply it to slave as well. + * Here use instance 0 as master. + */ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { uint32_t f32_offset, f32_cntl; @@ -95,12 +165,7 @@ static int vpe_v6_1_load_microcode(struct amdgpu_vpe *vpe) adev->vpe.cmdbuf_cpu_addr[0] = f32_offset; adev->vpe.cmdbuf_cpu_addr[1] = f32_cntl; - amdgpu_vpe_psp_update_sram(adev); - - /* Config DPM */ - amdgpu_vpe_configure_dpm(vpe); - - return 0; + return amdgpu_vpe_psp_update_sram(adev); } vpe_hdr = (const struct vpe_firmware_header_v1_0 *)adev->vpe.fw->data; @@ -114,26 +179,25 @@ static int vpe_v6_1_load_microcode(struct amdgpu_vpe *vpe) vpe_v6_1_halt(vpe, true); - for (i = 0; i < 2; i++) { - if (i > 0) - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_UCODE_ADDR), VPE_THREAD1_UCODE_OFFSET); - else - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_UCODE_ADDR), 0); - - data = (const __le32 *)(adev->vpe.fw->data + ucode_offset[i]); - size_dw = ucode_size[i] / sizeof(__le32); - - while (size_dw--) { - if (amdgpu_emu_mode && size_dw % 500 == 0) - msleep(1); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_UCODE_DATA), le32_to_cpup(data++)); + for (j = 0; j < vpe->num_instances; j++) { + for (i = 0; i < 2; i++) { + if (i > 0) + WREG32(vpe_get_reg_offset(vpe, j, regVPEC_UCODE_ADDR), VPE_THREAD1_UCODE_OFFSET); + else + WREG32(vpe_get_reg_offset(vpe, j, regVPEC_UCODE_ADDR), 0); + + data = (const __le32 *)(adev->vpe.fw->data + ucode_offset[i]); + size_dw = ucode_size[i] / sizeof(__le32); + + while (size_dw--) { + if (amdgpu_emu_mode && size_dw % 500 == 0) + msleep(1); + WREG32(vpe_get_reg_offset(vpe, j, regVPEC_UCODE_DATA), le32_to_cpup(data++)); + } } - } vpe_v6_1_halt(vpe, false); - /* Config DPM */ - amdgpu_vpe_configure_dpm(vpe); return 0; } @@ -142,68 +206,68 @@ static int vpe_v6_1_ring_start(struct amdgpu_vpe *vpe) { struct amdgpu_ring *ring = &vpe->ring; struct amdgpu_device *adev = ring->adev; - uint32_t rb_bufsz, rb_cntl; - uint32_t ib_cntl; uint32_t doorbell, doorbell_offset; + uint32_t rb_bufsz, rb_cntl; + uint32_t ib_cntl, i; int ret; - rb_bufsz = order_base_2(ring->ring_size / 4); - rb_cntl = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_CNTL)); - rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_SIZE, rb_bufsz); - rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_PRIV, 1); - rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_VMID, 0); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_CNTL), rb_cntl); - - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_RPTR), 0); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_RPTR_HI), 0); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_WPTR), 0); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_WPTR_HI), 0); - - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_RPTR_ADDR_LO), - lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_RPTR_ADDR_HI), - upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF); - - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_BASE), ring->gpu_addr >> 8); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_BASE_HI), ring->gpu_addr >> 40); - - ring->wptr = 0; - - /* before programing wptr to a less value, need set minor_ptr_update first */ - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_MINOR_PTR_UPDATE), 1); - - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr) << 2); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2); - - /* set minor_ptr_update to 0 after wptr programed */ - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_MINOR_PTR_UPDATE), 0); - - doorbell = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_DOORBELL)); - doorbell_offset = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_DOORBELL_OFFSET)); - - doorbell = REG_SET_FIELD(doorbell, VPEC_QUEUE0_DOORBELL, ENABLE, ring->use_doorbell ? 1 : 0); - doorbell_offset = REG_SET_FIELD(doorbell_offset, VPEC_QUEUE0_DOORBELL_OFFSET, OFFSET, ring->doorbell_index); - - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_DOORBELL), doorbell); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_DOORBELL_OFFSET), doorbell_offset); - - adev->nbio.funcs->vpe_doorbell_range(adev, 0, ring->use_doorbell, ring->doorbell_index, 2); - - rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); - rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_ENABLE, 1); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_CNTL), rb_cntl); - - ib_cntl = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_IB_CNTL)); - ib_cntl = REG_SET_FIELD(ib_cntl, VPEC_QUEUE0_IB_CNTL, IB_ENABLE, 1); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_IB_CNTL), ib_cntl); - - ring->sched.ready = true; + for (i = 0; i < vpe->num_instances; i++) { + /* Set ring buffer size in dwords */ + rb_bufsz = order_base_2(ring->ring_size / 4); + rb_cntl = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_CNTL)); + rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_SIZE, rb_bufsz); + rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_PRIV, 1); + rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_VMID, 0); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_CNTL), rb_cntl); + + /* Initialize the ring buffer's read and write pointers */ + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_RPTR), 0); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_RPTR_HI), 0); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_WPTR), 0); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_WPTR_HI), 0); + + /* set the wb address whether it's enabled or not */ + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_RPTR_ADDR_LO), + lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_RPTR_ADDR_HI), + upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF); + + rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); + + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_BASE), ring->gpu_addr >> 8); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_BASE_HI), ring->gpu_addr >> 40); + + ring->wptr = 0; + + /* before programing wptr to a less value, need set minor_ptr_update first */ + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_MINOR_PTR_UPDATE), 1); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr) << 2); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2); + /* set minor_ptr_update to 0 after wptr programed */ + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_MINOR_PTR_UPDATE), 0); + + doorbell_offset = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_DOORBELL_OFFSET)); + doorbell_offset = REG_SET_FIELD(doorbell_offset, VPEC_QUEUE0_DOORBELL_OFFSET, OFFSET, ring->doorbell_index + i*4); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_DOORBELL_OFFSET), doorbell_offset); + + doorbell = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_DOORBELL)); + doorbell = REG_SET_FIELD(doorbell, VPEC_QUEUE0_DOORBELL, ENABLE, ring->use_doorbell ? 1 : 0); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_DOORBELL), doorbell); + + adev->nbio.funcs->vpe_doorbell_range(adev, i, ring->use_doorbell, ring->doorbell_index + i*4, 4); + + rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); + rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_ENABLE, 1); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_CNTL), rb_cntl); + + ib_cntl = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_IB_CNTL)); + ib_cntl = REG_SET_FIELD(ib_cntl, VPEC_QUEUE0_IB_CNTL, IB_ENABLE, 1); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_IB_CNTL), ib_cntl); + } ret = amdgpu_ring_test_helper(ring); - if (ret) { - ring->sched.ready = false; + if (ret) return ret; - } return 0; } @@ -211,17 +275,30 @@ static int vpe_v6_1_ring_start(struct amdgpu_vpe *vpe) static int vpe_v_6_1_ring_stop(struct amdgpu_vpe *vpe) { struct amdgpu_device *adev = vpe->ring.adev; - uint32_t queue_reset; + uint32_t queue_reset, i; int ret; - queue_reset = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE_RESET_REQ)); - queue_reset = REG_SET_FIELD(queue_reset, VPEC_QUEUE_RESET_REQ, QUEUE0_RESET, 1); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE_RESET_REQ), queue_reset); + for (i = 0; i < vpe->num_instances; i++) { + if (amdgpu_ip_version(adev, VPE_HWIP, 0) == IP_VERSION(6, 1, 1)) + queue_reset = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE_RESET_REQ_6_1_1)); + else + queue_reset = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE_RESET_REQ)); + + queue_reset = REG_SET_FIELD(queue_reset, VPEC_QUEUE_RESET_REQ, QUEUE0_RESET, 1); + + if (amdgpu_ip_version(adev, VPE_HWIP, 0) == IP_VERSION(6, 1, 1)) { + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE_RESET_REQ_6_1_1), queue_reset); + ret = SOC15_WAIT_ON_RREG(VPE, i, regVPEC_QUEUE_RESET_REQ_6_1_1, 0, + VPEC_QUEUE_RESET_REQ__QUEUE0_RESET_MASK); + } else { + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE_RESET_REQ), queue_reset); + ret = SOC15_WAIT_ON_RREG(VPE, i, regVPEC_QUEUE_RESET_REQ, 0, + VPEC_QUEUE_RESET_REQ__QUEUE0_RESET_MASK); + } - ret = SOC15_WAIT_ON_RREG(VPE, 0, regVPEC_QUEUE_RESET_REQ, 0, - VPEC_QUEUE_RESET_REQ__QUEUE0_RESET_MASK); - if (ret) - dev_err(adev->dev, "VPE queue reset failed\n"); + if (ret) + dev_err(adev->dev, "VPE queue reset failed\n"); + } vpe->ring.sched.ready = false; @@ -236,10 +313,18 @@ static int vpe_v6_1_set_trap_irq_state(struct amdgpu_device *adev, struct amdgpu_vpe *vpe = &adev->vpe; uint32_t vpe_cntl; - vpe_cntl = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL)); + if (amdgpu_ip_version(adev, VPE_HWIP, 0) == IP_VERSION(6, 1, 1)) + vpe_cntl = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL_6_1_1)); + else + vpe_cntl = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL)); + vpe_cntl = REG_SET_FIELD(vpe_cntl, VPEC_CNTL, TRAP_ENABLE, state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL), vpe_cntl); + + if (amdgpu_ip_version(adev, VPE_HWIP, 0) == IP_VERSION(6, 1, 1)) + WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL_6_1_1), vpe_cntl); + else + WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL), vpe_cntl); return 0; } @@ -264,13 +349,19 @@ static int vpe_v6_1_process_trap_irq(struct amdgpu_device *adev, static int vpe_v6_1_set_regs(struct amdgpu_vpe *vpe) { + struct amdgpu_device *adev = container_of(vpe, struct amdgpu_device, vpe); + vpe->regs.queue0_rb_rptr_lo = regVPEC_QUEUE0_RB_RPTR; vpe->regs.queue0_rb_rptr_hi = regVPEC_QUEUE0_RB_RPTR_HI; vpe->regs.queue0_rb_wptr_lo = regVPEC_QUEUE0_RB_WPTR; vpe->regs.queue0_rb_wptr_hi = regVPEC_QUEUE0_RB_WPTR_HI; vpe->regs.queue0_preempt = regVPEC_QUEUE0_PREEMPT; - vpe->regs.dpm_enable = regVPEC_PUB_DUMMY2; + if (amdgpu_ip_version(adev, VPE_HWIP, 0) == IP_VERSION(6, 1, 1)) + vpe->regs.dpm_enable = regVPEC_PUB_DUMMY2_6_1_1; + else + vpe->regs.dpm_enable = regVPEC_PUB_DUMMY2; + vpe->regs.dpm_pratio = regVPEC_QUEUE6_DUMMY4; vpe->regs.dpm_request_interval = regVPEC_QUEUE5_DUMMY3; vpe->regs.dpm_decision_threshold = regVPEC_QUEUE5_DUMMY4; |