summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/display/dc/dml
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-06-19 21:00:30 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-06-19 21:00:30 +0000
commite54def4ad8144ab15f826416e2e0f290ef1901b4 (patch)
tree583f8d4bd95cd67c44ff37b878a7eddfca9ab97a /drivers/gpu/drm/amd/display/dc/dml
parentAdding upstream version 6.8.12. (diff)
downloadlinux-e54def4ad8144ab15f826416e2e0f290ef1901b4.tar.xz
linux-e54def4ad8144ab15f826416e2e0f290ef1901b4.zip
Adding upstream version 6.9.2.upstream/6.9.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'drivers/gpu/drm/amd/display/dc/dml')
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/Makefile3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c16
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn303/dcn303_fpu.c11
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c20
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c639
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.h19
7 files changed, 693 insertions, 17 deletions
diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
index 59ade76ff..c4a5efd2d 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
@@ -92,6 +92,7 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/display_rq_dlg_calc_32.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_util_32.o := $(dml_ccflags) $(frame_warn_flag)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn321/dcn321_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn35/dcn35_fpu.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn351/dcn351_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/dcn31_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn302/dcn302_fpu.o := $(dml_ccflags)
@@ -126,6 +127,7 @@ CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn30/dcn30_fpu.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/dcn32_fpu.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn321/dcn321_fpu.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn35/dcn35_fpu.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn351/dcn351_fpu.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/dcn31_fpu.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn302/dcn302_fpu.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn303/dcn303_fpu.o := $(dml_rcflags)
@@ -157,6 +159,7 @@ DML += dcn302/dcn302_fpu.o
DML += dcn303/dcn303_fpu.o
DML += dcn314/dcn314_fpu.o
DML += dcn35/dcn35_fpu.o
+DML += dcn351/dcn351_fpu.o
DML += dsc/rc_calc_fpu.o
DML += calcs/dcn_calcs.o calcs/dcn_calc_math.o calcs/dcn_calc_auto.o
endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
index 63c48c29b..e7f4a2d49 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
@@ -4273,7 +4273,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
//Calculate Swath, DET Configuration, DCFCLKDeepSleep
//
- for (i = 0; i < mode_lib->soc.num_states; ++i) {
+ for (i = start_state; i < mode_lib->soc.num_states; ++i) {
for (j = 0; j <= 1; ++j) {
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k];
@@ -4576,7 +4576,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
//Calculate Return BW
- for (i = 0; i < mode_lib->soc.num_states; ++i) {
+ for (i = start_state; i < mode_lib->soc.num_states; ++i) {
for (j = 0; j <= 1; ++j) {
for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
if (v->BlendingAndTiming[k] == k) {
@@ -4635,7 +4635,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
v->FinalDRAMClockChangeLatency = (v->DRAMClockChangeLatencyOverride > 0 ? v->DRAMClockChangeLatencyOverride : v->DRAMClockChangeLatency);
- for (i = 0; i < mode_lib->soc.num_states; ++i) {
+ for (i = start_state; i < mode_lib->soc.num_states; ++i) {
for (j = 0; j <= 1; ++j) {
v->DCFCLKState[i][j] = v->DCFCLKPerState[i];
}
@@ -4646,7 +4646,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
if (v->ClampMinDCFCLK) {
/* Clamp calculated values to actual minimum */
- for (i = 0; i < mode_lib->soc.num_states; ++i) {
+ for (i = start_state; i < mode_lib->soc.num_states; ++i) {
for (j = 0; j <= 1; ++j) {
if (v->DCFCLKState[i][j] < mode_lib->soc.min_dcfclk) {
v->DCFCLKState[i][j] = mode_lib->soc.min_dcfclk;
@@ -4656,7 +4656,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
}
}
- for (i = 0; i < mode_lib->soc.num_states; ++i) {
+ for (i = start_state; i < mode_lib->soc.num_states; ++i) {
for (j = 0; j <= 1; ++j) {
v->IdealSDPPortBandwidthPerState[i][j] = dml_min3(
v->ReturnBusWidth * v->DCFCLKState[i][j],
@@ -4674,7 +4674,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
//Re-ordering Buffer Support Check
- for (i = 0; i < mode_lib->soc.num_states; ++i) {
+ for (i = start_state; i < mode_lib->soc.num_states; ++i) {
for (j = 0; j <= 1; ++j) {
if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j]
> (v->RoundTripPingLatencyCycles + 32) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) {
@@ -4692,7 +4692,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
}
- for (i = 0; i < mode_lib->soc.num_states; ++i) {
+ for (i = start_state; i < mode_lib->soc.num_states; ++i) {
for (j = 0; j <= 1; ++j) {
v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min(
v->IdealSDPPortBandwidthPerState[i][j] * v->MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation / 100,
@@ -4708,7 +4708,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
//Prefetch Check
- for (i = 0; i < mode_lib->soc.num_states; ++i) {
+ for (i = start_state; i < mode_lib->soc.num_states; ++i) {
for (j = 0; j <= 1; ++j) {
int NextPrefetchModeState = MinPrefetchMode;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn303/dcn303_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn303/dcn303_fpu.c
index 3eb3a021a..3f02bb806 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn303/dcn303_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn303/dcn303_fpu.c
@@ -266,6 +266,17 @@ void dcn303_fpu_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_p
optimal_uclk_for_dcfclk_sta_targets[i] =
bw_params->clk_table.entries[j].memclk_mhz * 16;
break;
+ } else {
+ /* condition where (dcfclk_sta_targets[i] >= optimal_dcfclk_for_uclk[j]):
+ * This is required for dcn303 because it just so happens that the memory
+ * bandwidth is low enough such that all the optimal DCFCLK for each UCLK
+ * is lower than the smallest DCFCLK STA target. In this case we need to
+ * populate the optimal UCLK for each DCFCLK STA target to be the max UCLK.
+ */
+ if (j == num_uclk_states - 1) {
+ optimal_uclk_for_dcfclk_sta_targets[i] =
+ bw_params->clk_table.entries[j].memclk_mhz * 16;
+ }
}
}
}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index ba76dd4a2..a0a65e099 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -2760,7 +2760,7 @@ static int build_synthetic_soc_states(bool disable_dc_mode_overwrite, struct clk
struct _vcs_dpi_voltage_scaling_st entry = {0};
struct clk_limit_table_entry max_clk_data = {0};
- unsigned int min_dcfclk_mhz = 399, min_fclk_mhz = 599;
+ unsigned int min_dcfclk_mhz = 199, min_fclk_mhz = 299;
static const unsigned int num_dcfclk_stas = 5;
unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {199, 615, 906, 1324, 1564};
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
index 7ea2bd537..53e40d3c4 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
@@ -166,8 +166,8 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = {
.num_states = 5,
.sr_exit_time_us = 28.0,
.sr_enter_plus_exit_time_us = 30.0,
- .sr_exit_z8_time_us = 210.0,
- .sr_enter_plus_exit_z8_time_us = 320.0,
+ .sr_exit_z8_time_us = 250.0,
+ .sr_enter_plus_exit_z8_time_us = 350.0,
.fclk_change_latency_us = 24.0,
.usr_retraining_latency_us = 2,
.writeback_latency_us = 12.0,
@@ -195,9 +195,9 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = {
.dcn_downspread_percent = 0.5,
.gpuvm_min_page_size_bytes = 4096,
.hostvm_min_page_size_bytes = 4096,
- .do_urgent_latency_adjustment = 0,
+ .do_urgent_latency_adjustment = 1,
.urgent_latency_adjustment_fabric_clock_component_us = 0,
- .urgent_latency_adjustment_fabric_clock_reference_mhz = 0,
+ .urgent_latency_adjustment_fabric_clock_reference_mhz = 3000,
};
void dcn35_build_wm_range_table_fpu(struct clk_mgr *clk_mgr)
@@ -583,12 +583,14 @@ void dcn35_decide_zstate_support(struct dc *dc, struct dc_state *context)
plane_count++;
}
- if (plane_count == 0) {
+ if (context->stream_count == 0 || plane_count == 0) {
support = DCN_ZSTATE_SUPPORT_ALLOW;
- } else if (plane_count == 1 && context->stream_count == 1 && context->streams[0]->signal == SIGNAL_TYPE_EDP) {
+ } else if (context->stream_count == 1 && context->streams[0]->signal == SIGNAL_TYPE_EDP) {
struct dc_link *link = context->streams[0]->sink->link;
bool is_pwrseq0 = link && link->link_index == 0;
- bool is_psr1 = link && link->psr_settings.psr_version == DC_PSR_VERSION_1 && !link->panel_config.psr.disable_psr;
+ bool is_psr = (link && (link->psr_settings.psr_version == DC_PSR_VERSION_1 ||
+ link->psr_settings.psr_version == DC_PSR_VERSION_SU_1) && !link->panel_config.psr.disable_psr);
+ bool is_replay = link && link->replay_settings.replay_feature_enabled;
int minmum_z8_residency =
dc->debug.minimum_z8_residency_time > 0 ? dc->debug.minimum_z8_residency_time : 1000;
bool allow_z8 = context->bw_ctx.dml.vba.StutterPeriod > (double)minmum_z8_residency;
@@ -596,12 +598,14 @@ void dcn35_decide_zstate_support(struct dc *dc, struct dc_state *context)
dc->debug.minimum_z10_residency_time > 0 ? dc->debug.minimum_z10_residency_time : 5000;
bool allow_z10 = context->bw_ctx.dml.vba.StutterPeriod > (double)minmum_z10_residency;
+ /*for psr1/psr-su, we allow z8 and z10 based on latency, for replay with IPS enabled, it will enter ips2*/
if (is_pwrseq0 && allow_z10)
support = DCN_ZSTATE_SUPPORT_ALLOW;
- else if (is_pwrseq0 && is_psr1)
+ else if (is_pwrseq0 && (is_psr || is_replay))
support = allow_z8 ? DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY : DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY;
else if (allow_z8)
support = DCN_ZSTATE_SUPPORT_ALLOW_Z8_ONLY;
+
}
context->bw_ctx.bw.dcn.clk.zstate_support = support;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c
new file mode 100644
index 000000000..b3ffab77c
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c
@@ -0,0 +1,639 @@
+/* SPDX-License-Identifier: MIT */
+/* Copyright 2024 Advanced Micro Devices, Inc. */
+#include "resource.h"
+#include "dcn351_fpu.h"
+#include "dcn31/dcn31_resource.h"
+#include "dcn32/dcn32_resource.h"
+#include "dcn35/dcn35_resource.h"
+#include "dcn351/dcn351_resource.h"
+#include "dml/dcn31/dcn31_fpu.h"
+#include "dml/dcn35/dcn35_fpu.h"
+#include "dml/dml_inline_defs.h"
+
+#include "link.h"
+
+#define DC_LOGGER_INIT(logger)
+
+struct _vcs_dpi_ip_params_st dcn3_51_ip = {
+ .VBlankNomDefaultUS = 668,
+ .gpuvm_enable = 1,
+ .gpuvm_max_page_table_levels = 1,
+ .hostvm_enable = 1,
+ .hostvm_max_page_table_levels = 2,
+ .rob_buffer_size_kbytes = 64,
+ .det_buffer_size_kbytes = 1536,
+ .config_return_buffer_size_in_kbytes = 1792,
+ .compressed_buffer_segment_size_in_kbytes = 64,
+ .meta_fifo_size_in_kentries = 32,
+ .zero_size_buffer_entries = 512,
+ .compbuf_reserved_space_64b = 256,
+ .compbuf_reserved_space_zs = 64,
+ .dpp_output_buffer_pixels = 2560,/*not used*/
+ .opp_output_buffer_lines = 1,/*not used*/
+ .pixel_chunk_size_kbytes = 8,
+ //.alpha_pixel_chunk_size_kbytes = 4;/*new*/
+ //.min_pixel_chunk_size_bytes = 1024;/*new*/
+ .meta_chunk_size_kbytes = 2,
+ .min_meta_chunk_size_bytes = 256,
+ .writeback_chunk_size_kbytes = 8,
+ .ptoi_supported = false,
+ .num_dsc = 4,
+ .maximum_dsc_bits_per_component = 12,/*delta from 10*/
+ .dsc422_native_support = true,/*delta from false*/
+ .is_line_buffer_bpp_fixed = true,/*new*/
+ .line_buffer_fixed_bpp = 32,/*delta from 48*/
+ .line_buffer_size_bits = 986880,/*delta from 789504*/
+ .max_line_buffer_lines = 32,/*delta from 12*/
+ .writeback_interface_buffer_size_kbytes = 90,
+ .max_num_dpp = 4,
+ .max_num_otg = 4,
+ .max_num_hdmi_frl_outputs = 1,
+ .max_num_wb = 1,
+ /*.max_num_hdmi_frl_outputs = 1; new in dml2*/
+ /*.max_num_dp2p0_outputs = 2; new in dml2*/
+ /*.max_num_dp2p0_streams = 4; new in dml2*/
+ .max_dchub_pscl_bw_pix_per_clk = 4,
+ .max_pscl_lb_bw_pix_per_clk = 2,
+ .max_lb_vscl_bw_pix_per_clk = 4,
+ .max_vscl_hscl_bw_pix_per_clk = 4,
+ .max_hscl_ratio = 6,
+ .max_vscl_ratio = 6,
+ .max_hscl_taps = 8,
+ .max_vscl_taps = 8,
+ .dpte_buffer_size_in_pte_reqs_luma = 68,/*changed from 64,*/
+ .dpte_buffer_size_in_pte_reqs_chroma = 36,/*changed from 34*/
+ /*.dcc_meta_buffer_size_bytes = 6272; new to dml2*/
+ .dispclk_ramp_margin_percent = 1.11,/*delta from 1*/
+ /*.dppclk_delay_subtotal = 47;
+ .dppclk_delay_scl = 50;
+ .dppclk_delay_scl_lb_only = 16;
+ .dppclk_delay_cnvc_formatter = 28;
+ .dppclk_delay_cnvc_cursor = 6;
+ .dispclk_delay_subtotal = 125;*/ /*new to dml2*/
+ .max_inter_dcn_tile_repeaters = 8,
+ .cursor_buffer_size = 16,
+ .cursor_chunk_size = 2,
+ .writeback_line_buffer_buffer_size = 0,
+ .writeback_min_hscl_ratio = 1,
+ .writeback_min_vscl_ratio = 1,
+ .writeback_max_hscl_ratio = 1,
+ .writeback_max_vscl_ratio = 1,
+ .writeback_max_hscl_taps = 1,
+ .writeback_max_vscl_taps = 1,
+ .dppclk_delay_subtotal = 47, /* changed from 46,*/
+ .dppclk_delay_scl = 50,
+ .dppclk_delay_scl_lb_only = 16,
+ .dppclk_delay_cnvc_formatter = 28,/*changed from 27,*/
+ .dppclk_delay_cnvc_cursor = 6,
+ .dispclk_delay_subtotal = 125, /*changed from 119,*/
+ .dynamic_metadata_vm_enabled = false,
+ .odm_combine_4to1_supported = false,
+ .dcc_supported = true,
+// .config_return_buffer_segment_size_in_kbytes = 64;/*required, hard coded in dml2_translate_ip_params*/
+
+};
+
+struct _vcs_dpi_soc_bounding_box_st dcn3_51_soc = {
+ /*TODO: correct dispclk/dppclk voltage level determination*/
+ .clock_limits = {
+ {
+ .state = 0,
+ .dcfclk_mhz = 400.0,
+ .fabricclk_mhz = 400.0,
+ .socclk_mhz = 600.0,
+ .dram_speed_mts = 3200.0,
+ .dispclk_mhz = 600.0,
+ .dppclk_mhz = 600.0,
+ .phyclk_mhz = 600.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 200.0,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 1,
+ .dcfclk_mhz = 600.0,
+ .fabricclk_mhz = 1000.0,
+ .socclk_mhz = 733.0,
+ .dram_speed_mts = 6400.0,
+ .dispclk_mhz = 800.0,
+ .dppclk_mhz = 800.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 266.7,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 2,
+ .dcfclk_mhz = 738.0,
+ .fabricclk_mhz = 1200.0,
+ .socclk_mhz = 880.0,
+ .dram_speed_mts = 7500.0,
+ .dispclk_mhz = 800.0,
+ .dppclk_mhz = 800.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 266.7,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 3,
+ .dcfclk_mhz = 800.0,
+ .fabricclk_mhz = 1400.0,
+ .socclk_mhz = 978.0,
+ .dram_speed_mts = 7500.0,
+ .dispclk_mhz = 960.0,
+ .dppclk_mhz = 960.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 320.0,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 4,
+ .dcfclk_mhz = 873.0,
+ .fabricclk_mhz = 1600.0,
+ .socclk_mhz = 1100.0,
+ .dram_speed_mts = 8533.0,
+ .dispclk_mhz = 1066.7,
+ .dppclk_mhz = 1066.7,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 355.6,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 5,
+ .dcfclk_mhz = 960.0,
+ .fabricclk_mhz = 1700.0,
+ .socclk_mhz = 1257.0,
+ .dram_speed_mts = 8533.0,
+ .dispclk_mhz = 1200.0,
+ .dppclk_mhz = 1200.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 400.0,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 6,
+ .dcfclk_mhz = 1067.0,
+ .fabricclk_mhz = 1850.0,
+ .socclk_mhz = 1257.0,
+ .dram_speed_mts = 8533.0,
+ .dispclk_mhz = 1371.4,
+ .dppclk_mhz = 1371.4,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 457.1,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 7,
+ .dcfclk_mhz = 1200.0,
+ .fabricclk_mhz = 2000.0,
+ .socclk_mhz = 1467.0,
+ .dram_speed_mts = 8533.0,
+ .dispclk_mhz = 1600.0,
+ .dppclk_mhz = 1600.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 533.3,
+ .dtbclk_mhz = 600.0,
+ },
+ },
+ .num_states = 8,
+ .sr_exit_time_us = 28.0,
+ .sr_enter_plus_exit_time_us = 30.0,
+ .sr_exit_z8_time_us = 250.0,
+ .sr_enter_plus_exit_z8_time_us = 350.0,
+ .fclk_change_latency_us = 24.0,
+ .usr_retraining_latency_us = 2,
+ .writeback_latency_us = 12.0,
+
+ .dram_channel_width_bytes = 4,/*not exist in dml2*/
+ .round_trip_ping_latency_dcfclk_cycles = 106,/*not exist in dml2*/
+ .urgent_latency_pixel_data_only_us = 4.0,
+ .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
+ .urgent_latency_vm_data_only_us = 4.0,
+ .dram_clock_change_latency_us = 11.72,
+ .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
+
+ .pct_ideal_sdp_bw_after_urgent = 80.0,
+ .pct_ideal_fabric_bw_after_urgent = 80.0, /*new to dml2*/
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0,
+ .max_avg_sdp_bw_use_normal_percent = 60.0,
+ .max_avg_dram_bw_use_normal_percent = 60.0,
+ .fabric_datapath_to_dcn_data_return_bytes = 32,
+ .return_bus_width_bytes = 64,
+ .downspread_percent = 0.38,
+ .dcn_downspread_percent = 0.5,
+ .gpuvm_min_page_size_bytes = 4096,
+ .hostvm_min_page_size_bytes = 4096,
+ .do_urgent_latency_adjustment = 0,
+ .urgent_latency_adjustment_fabric_clock_component_us = 0,
+ .urgent_latency_adjustment_fabric_clock_reference_mhz = 0,
+ .num_chans = 4,
+ .dram_clock_change_latency_us = 11.72,
+ .dispclk_dppclk_vco_speed_mhz = 2400.0,
+};
+
+/*
+ * dcn351_update_bw_bounding_box
+ *
+ * This would override some dcn3_51 ip_or_soc initial parameters hardcoded from
+ * spreadsheet with actual values as per dGPU SKU:
+ * - with passed few options from dc->config
+ * - with dentist_vco_frequency from Clk Mgr (currently hardcoded, but might
+ * need to get it from PM FW)
+ * - with passed latency values (passed in ns units) in dc-> bb override for
+ * debugging purposes
+ * - with passed latencies from VBIOS (in 100_ns units) if available for
+ * certain dGPU SKU
+ * - with number of DRAM channels from VBIOS (which differ for certain dGPU SKU
+ * of the same ASIC)
+ * - clocks levels with passed clk_table entries from Clk Mgr as reported by PM
+ * FW for different clocks (which might differ for certain dGPU SKU of the
+ * same ASIC)
+ */
+void dcn351_update_bw_bounding_box_fpu(struct dc *dc,
+ struct clk_bw_params *bw_params)
+{
+ unsigned int i, closest_clk_lvl;
+ int j;
+ struct clk_limit_table *clk_table = &bw_params->clk_table;
+ struct _vcs_dpi_voltage_scaling_st *clock_limits =
+ dc->scratch.update_bw_bounding_box.clock_limits;
+ int max_dispclk_mhz = 0, max_dppclk_mhz = 0;
+
+ dc_assert_fp_enabled();
+
+ dcn3_51_ip.max_num_otg =
+ dc->res_pool->res_cap->num_timing_generator;
+ dcn3_51_ip.max_num_dpp = dc->res_pool->pipe_count;
+ dcn3_51_soc.num_chans = bw_params->num_channels;
+
+ ASSERT(clk_table->num_entries);
+
+ /* Prepass to find max clocks independent of voltage level. */
+ for (i = 0; i < clk_table->num_entries; ++i) {
+ if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz)
+ max_dispclk_mhz = clk_table->entries[i].dispclk_mhz;
+ if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz)
+ max_dppclk_mhz = clk_table->entries[i].dppclk_mhz;
+ }
+
+ for (i = 0; i < clk_table->num_entries; i++) {
+ /* loop backwards*/
+ for (closest_clk_lvl = 0, j = dcn3_51_soc.num_states - 1;
+ j >= 0; j--) {
+ if (dcn3_51_soc.clock_limits[j].dcfclk_mhz <=
+ clk_table->entries[i].dcfclk_mhz) {
+ closest_clk_lvl = j;
+ break;
+ }
+ }
+ if (clk_table->num_entries == 1) {
+ /*smu gives one DPM level, let's take the highest one*/
+ closest_clk_lvl = dcn3_51_soc.num_states - 1;
+ }
+
+ clock_limits[i].state = i;
+
+ /* Clocks dependent on voltage level. */
+ clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
+ if (clk_table->num_entries == 1 &&
+ clock_limits[i].dcfclk_mhz <
+ dcn3_51_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) {
+ /*SMU fix not released yet*/
+ clock_limits[i].dcfclk_mhz =
+ dcn3_51_soc.clock_limits[closest_clk_lvl].dcfclk_mhz;
+ }
+
+ clock_limits[i].fabricclk_mhz =
+ clk_table->entries[i].fclk_mhz;
+ clock_limits[i].socclk_mhz =
+ clk_table->entries[i].socclk_mhz;
+
+ if (clk_table->entries[i].memclk_mhz &&
+ clk_table->entries[i].wck_ratio)
+ clock_limits[i].dram_speed_mts =
+ clk_table->entries[i].memclk_mhz * 2 *
+ clk_table->entries[i].wck_ratio;
+
+ /* Clocks independent of voltage level. */
+ clock_limits[i].dispclk_mhz = max_dispclk_mhz ?
+ max_dispclk_mhz :
+ dcn3_51_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
+
+ clock_limits[i].dppclk_mhz = max_dppclk_mhz ?
+ max_dppclk_mhz :
+ dcn3_51_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
+
+ clock_limits[i].dram_bw_per_chan_gbps =
+ dcn3_51_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
+ clock_limits[i].dscclk_mhz =
+ dcn3_51_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
+ clock_limits[i].dtbclk_mhz =
+ dcn3_51_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
+ clock_limits[i].phyclk_d18_mhz =
+ dcn3_51_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
+ clock_limits[i].phyclk_mhz =
+ dcn3_51_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
+ }
+
+ memcpy(dcn3_51_soc.clock_limits, clock_limits,
+ sizeof(dcn3_51_soc.clock_limits));
+
+ if (clk_table->num_entries)
+ dcn3_51_soc.num_states = clk_table->num_entries;
+
+ if (max_dispclk_mhz) {
+ dcn3_51_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
+ dc->dml.soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
+ }
+ if ((int)(dcn3_51_soc.dram_clock_change_latency_us * 1000)
+ != dc->debug.dram_clock_change_latency_ns
+ && dc->debug.dram_clock_change_latency_ns) {
+ dcn3_51_soc.dram_clock_change_latency_us =
+ dc->debug.dram_clock_change_latency_ns / 1000.0;
+ }
+
+ if (dc->bb_overrides.dram_clock_change_latency_ns > 0)
+ dcn3_51_soc.dram_clock_change_latency_us =
+ dc->bb_overrides.dram_clock_change_latency_ns / 1000.0;
+
+ if (dc->bb_overrides.sr_exit_time_ns > 0)
+ dcn3_51_soc.sr_exit_time_us = dc->bb_overrides.sr_exit_time_ns / 1000.0;
+
+ if (dc->bb_overrides.sr_enter_plus_exit_time_ns > 0)
+ dcn3_51_soc.sr_enter_plus_exit_time_us =
+ dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0;
+
+ if (dc->bb_overrides.sr_exit_z8_time_ns > 0)
+ dcn3_51_soc.sr_exit_z8_time_us = dc->bb_overrides.sr_exit_z8_time_ns / 1000.0;
+
+ if (dc->bb_overrides.sr_enter_plus_exit_z8_time_ns > 0)
+ dcn3_51_soc.sr_enter_plus_exit_z8_time_us =
+ dc->bb_overrides.sr_enter_plus_exit_z8_time_ns / 1000.0;
+
+ /*temp till dml2 fully work without dml1*/
+ dml_init_instance(&dc->dml, &dcn3_51_soc, &dcn3_51_ip,
+ DML_PROJECT_DCN31);
+
+ /*copy to dml2, before dml2_create*/
+ if (clk_table->num_entries > 2) {
+
+ for (i = 0; i < clk_table->num_entries; i++) {
+ dc->dml2_options.bbox_overrides.clks_table.num_states =
+ clk_table->num_entries;
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dcfclk_mhz =
+ clock_limits[i].dcfclk_mhz;
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].fclk_mhz =
+ clock_limits[i].fabricclk_mhz;
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dispclk_mhz =
+ clock_limits[i].dispclk_mhz;
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dppclk_mhz =
+ clock_limits[i].dppclk_mhz;
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].socclk_mhz =
+ clock_limits[i].socclk_mhz;
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].memclk_mhz =
+ clk_table->entries[i].memclk_mhz * clk_table->entries[i].wck_ratio;
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz =
+ clock_limits[i].dtbclk_mhz;
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dcfclk_levels =
+ clk_table->num_entries;
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_fclk_levels =
+ clk_table->num_entries;
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dispclk_levels =
+ clk_table->num_entries;
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dppclk_levels =
+ clk_table->num_entries;
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_socclk_levels =
+ clk_table->num_entries;
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_memclk_levels =
+ clk_table->num_entries;
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dtbclk_levels =
+ clk_table->num_entries;
+ }
+ }
+
+ /* Update latency values */
+ dc->dml2_options.bbox_overrides.dram_clock_change_latency_us = dcn3_51_soc.dram_clock_change_latency_us;
+
+ dc->dml2_options.bbox_overrides.sr_exit_latency_us = dcn3_51_soc.sr_exit_time_us;
+ dc->dml2_options.bbox_overrides.sr_enter_plus_exit_latency_us = dcn3_51_soc.sr_enter_plus_exit_time_us;
+
+ dc->dml2_options.bbox_overrides.sr_exit_z8_time_us = dcn3_51_soc.sr_exit_z8_time_us;
+ dc->dml2_options.bbox_overrides.sr_enter_plus_exit_z8_time_us = dcn3_51_soc.sr_enter_plus_exit_z8_time_us;
+}
+
+static bool is_dual_plane(enum surface_pixel_format format)
+{
+ return format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN ||
+ format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA;
+}
+
+/*
+ * micro_sec_to_vert_lines () - converts time to number of vertical lines for a given timing
+ *
+ * @param: num_us: number of microseconds
+ * @return: number of vertical lines. If exact number of vertical lines is not found then
+ * it will round up to next number of lines to guarantee num_us
+ */
+static unsigned int micro_sec_to_vert_lines(unsigned int num_us, struct dc_crtc_timing *timing)
+{
+ unsigned int num_lines = 0;
+ unsigned int lines_time_in_ns = 1000.0 *
+ (((float)timing->h_total * 1000.0) /
+ ((float)timing->pix_clk_100hz / 10.0));
+
+ num_lines = dml_ceil(1000.0 * num_us / lines_time_in_ns, 1.0);
+
+ return num_lines;
+}
+
+static unsigned int get_vertical_back_porch(struct dc_crtc_timing *timing)
+{
+ unsigned int v_active = 0, v_blank = 0, v_back_porch = 0;
+
+ v_active = timing->v_border_top + timing->v_addressable + timing->v_border_bottom;
+ v_blank = timing->v_total - v_active;
+ v_back_porch = v_blank - timing->v_front_porch - timing->v_sync_width;
+
+ return v_back_porch;
+}
+
+int dcn351_populate_dml_pipes_from_context_fpu(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ bool fast_validate)
+{
+ int i, pipe_cnt;
+ struct resource_context *res_ctx = &context->res_ctx;
+ struct pipe_ctx *pipe;
+ bool upscaled = false;
+ const unsigned int max_allowed_vblank_nom = 1023;
+
+ dcn31_populate_dml_pipes_from_context(dc, context, pipes,
+ fast_validate);
+
+ for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
+ struct dc_crtc_timing *timing;
+ unsigned int num_lines = 0;
+ unsigned int v_back_porch = 0;
+
+ if (!res_ctx->pipe_ctx[i].stream)
+ continue;
+
+ pipe = &res_ctx->pipe_ctx[i];
+ timing = &pipe->stream->timing;
+
+ num_lines = micro_sec_to_vert_lines(dcn3_51_ip.VBlankNomDefaultUS, timing);
+ v_back_porch = get_vertical_back_porch(timing);
+
+ if (pipe->stream->adjust.v_total_max ==
+ pipe->stream->adjust.v_total_min &&
+ pipe->stream->adjust.v_total_min > timing->v_total) {
+ pipes[pipe_cnt].pipe.dest.vtotal =
+ pipe->stream->adjust.v_total_min;
+ pipes[pipe_cnt].pipe.dest.vblank_nom = timing->v_total -
+ pipes[pipe_cnt].pipe.dest.vactive;
+ }
+
+ pipes[pipe_cnt].pipe.dest.vblank_nom = timing->v_total - pipes[pipe_cnt].pipe.dest.vactive;
+ pipes[pipe_cnt].pipe.dest.vblank_nom = min(pipes[pipe_cnt].pipe.dest.vblank_nom, num_lines);
+ // vblank_nom should not smaller than (VSync (timing->v_sync_width + v_back_porch) + 2)
+ // + 2 is because
+ // 1 -> VStartup_start should be 1 line before VSync
+ // 1 -> always reserve 1 line between start of vblank to vstartup signal
+ pipes[pipe_cnt].pipe.dest.vblank_nom =
+ max(pipes[pipe_cnt].pipe.dest.vblank_nom, timing->v_sync_width + v_back_porch + 2);
+ pipes[pipe_cnt].pipe.dest.vblank_nom = min(pipes[pipe_cnt].pipe.dest.vblank_nom, max_allowed_vblank_nom);
+
+ if (pipe->plane_state &&
+ (pipe->plane_state->src_rect.height <
+ pipe->plane_state->dst_rect.height ||
+ pipe->plane_state->src_rect.width <
+ pipe->plane_state->dst_rect.width))
+ upscaled = true;
+
+ /*
+ * Immediate flip can be set dynamically after enabling the
+ * plane. We need to require support for immediate flip or
+ * underflow can be intermittently experienced depending on peak
+ * b/w requirements.
+ */
+ pipes[pipe_cnt].pipe.src.immediate_flip = true;
+
+ pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
+
+ DC_FP_START();
+ dcn31_zero_pipe_dcc_fraction(pipes, pipe_cnt);
+ DC_FP_END();
+
+ pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
+ pipes[pipe_cnt].pipe.src.dcc_rate = 3;
+ pipes[pipe_cnt].dout.dsc_input_bpc = 0;
+ pipes[pipe_cnt].pipe.src.gpuvm_min_page_size_kbytes = 256;
+
+ if (pipes[pipe_cnt].dout.dsc_enable) {
+ switch (timing->display_color_depth) {
+ case COLOR_DEPTH_888:
+ pipes[pipe_cnt].dout.dsc_input_bpc = 8;
+ break;
+ case COLOR_DEPTH_101010:
+ pipes[pipe_cnt].dout.dsc_input_bpc = 10;
+ break;
+ case COLOR_DEPTH_121212:
+ pipes[pipe_cnt].dout.dsc_input_bpc = 12;
+ break;
+ default:
+ ASSERT(0);
+ break;
+ }
+ }
+
+ pipe_cnt++;
+ }
+
+ context->bw_ctx.dml.ip.det_buffer_size_kbytes = 384;/*per guide*/
+ dc->config.enable_4to1MPC = false;
+
+ if (pipe_cnt == 1 && pipe->plane_state && !dc->debug.disable_z9_mpc) {
+ if (is_dual_plane(pipe->plane_state->format)
+ && pipe->plane_state->src_rect.width <= 1920 &&
+ pipe->plane_state->src_rect.height <= 1080) {
+ dc->config.enable_4to1MPC = true;
+ } else if (!is_dual_plane(pipe->plane_state->format) &&
+ pipe->plane_state->src_rect.width <= 5120) {
+ /*
+ * Limit to 5k max to avoid forced pipe split when there
+ * is not enough detile for swath
+ */
+ context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192;
+ pipes[0].pipe.src.unbounded_req_mode = true;
+ }
+ } else if (context->stream_count >=
+ dc->debug.crb_alloc_policy_min_disp_count &&
+ dc->debug.crb_alloc_policy > DET_SIZE_DEFAULT) {
+ context->bw_ctx.dml.ip.det_buffer_size_kbytes =
+ dc->debug.crb_alloc_policy * 64;
+ } else if (context->stream_count >= 3 && upscaled) {
+ context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192;
+ }
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (!pipe->stream)
+ continue;
+
+ if (pipe->stream->signal == SIGNAL_TYPE_EDP &&
+ dc->debug.seamless_boot_odm_combine &&
+ pipe->stream->apply_seamless_boot_optimization) {
+
+ if (pipe->stream->apply_boot_odm_mode ==
+ dm_odm_combine_policy_2to1) {
+ context->bw_ctx.dml.vba.ODMCombinePolicy =
+ dm_odm_combine_policy_2to1;
+ break;
+ }
+ }
+ }
+
+ return pipe_cnt;
+}
+
+void dcn351_decide_zstate_support(struct dc *dc, struct dc_state *context)
+{
+ enum dcn_zstate_support_state support = DCN_ZSTATE_SUPPORT_DISALLOW;
+ unsigned int i, plane_count = 0;
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ if (context->res_ctx.pipe_ctx[i].plane_state)
+ plane_count++;
+ }
+
+ /*dcn351 does not support z9/z10*/
+ if (context->stream_count == 0 || plane_count == 0) {
+ support = DCN_ZSTATE_SUPPORT_ALLOW_Z8_ONLY;
+ } else if (context->stream_count == 1 && context->streams[0]->signal == SIGNAL_TYPE_EDP) {
+ struct dc_link *link = context->streams[0]->sink->link;
+ bool is_pwrseq0 = link && link->link_index == 0;
+ bool is_psr = (link && (link->psr_settings.psr_version == DC_PSR_VERSION_1 ||
+ link->psr_settings.psr_version == DC_PSR_VERSION_SU_1) && !link->panel_config.psr.disable_psr);
+ bool is_replay = link && link->replay_settings.replay_feature_enabled;
+ int minmum_z8_residency =
+ dc->debug.minimum_z8_residency_time > 0 ? dc->debug.minimum_z8_residency_time : 1000;
+ bool allow_z8 = context->bw_ctx.dml.vba.StutterPeriod > (double)minmum_z8_residency;
+
+ /*for psr1/psr-su, we allow z8 and z10 based on latency, for replay with IPS enabled, it will enter ips2*/
+ if (is_pwrseq0 && (is_psr || is_replay))
+ support = allow_z8 ? allow_z8 : DCN_ZSTATE_SUPPORT_DISALLOW;
+ }
+ context->bw_ctx.bw.dcn.clk.zstate_support = support;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.h
new file mode 100644
index 000000000..f93efab9a
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: MIT */
+/* Copyright 2024 Advanced Micro Devices, Inc. */
+
+#ifndef __DCN351_FPU_H__
+#define __DCN351_FPU_H__
+
+#include "clk_mgr.h"
+
+void dcn351_update_bw_bounding_box_fpu(struct dc *dc,
+ struct clk_bw_params *bw_params);
+
+int dcn351_populate_dml_pipes_from_context_fpu(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ bool fast_validate);
+
+void dcn351_decide_zstate_support(struct dc *dc, struct dc_state *context);
+
+#endif