diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:49:45 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:49:45 +0000 |
commit | 2c3c1048746a4622d8c89a29670120dc8fab93c4 (patch) | |
tree | 848558de17fb3008cdf4d861b01ac7781903ce39 /drivers/gpu/drm/amd/display/dc/dml/dcn31 | |
parent | Initial commit. (diff) | |
download | linux-2c3c1048746a4622d8c89a29670120dc8fab93c4.tar.xz linux-2c3c1048746a4622d8c89a29670120dc8fab93c4.zip |
Adding upstream version 6.1.76.upstream/6.1.76
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'drivers/gpu/drm/amd/display/dc/dml/dcn31')
6 files changed, 9810 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c new file mode 100644 index 000000000..19d034341 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c @@ -0,0 +1,840 @@ +/* + * Copyright 2019-2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "resource.h" +#include "clk_mgr.h" +#include "dcn31/dcn31_resource.h" +#include "dcn315/dcn315_resource.h" +#include "dcn316/dcn316_resource.h" + +#include "dml/dcn20/dcn20_fpu.h" +#include "dcn31_fpu.h" + +/** + * DOC: DCN31x FPU manipulation Overview + * + * The DCN architecture relies on FPU operations, which require special + * compilation flags and the use of kernel_fpu_begin/end functions; ideally, we + * want to avoid spreading FPU access across multiple files. With this idea in + * mind, this file aims to centralize all DCN3.1.x functions that require FPU + * access in a single place. Code in this file follows the following code + * pattern: + * + * 1. Functions that use FPU operations should be isolated in static functions. + * 2. The FPU functions should have the noinline attribute to ensure anything + * that deals with FP register is contained within this call. + * 3. All function that needs to be accessed outside this file requires a + * public interface that not uses any FPU reference. + * 4. Developers **must not** use DC_FP_START/END in this file, but they need + * to ensure that the caller invokes it before access any function available + * in this file. For this reason, public functions in this file must invoke + * dc_assert_fp_enabled(); + */ + +struct _vcs_dpi_ip_params_st dcn3_1_ip = { + .gpuvm_enable = 1, + .gpuvm_max_page_table_levels = 1, + .hostvm_enable = 1, + .hostvm_max_page_table_levels = 2, + .rob_buffer_size_kbytes = 64, + .det_buffer_size_kbytes = DCN3_1_DEFAULT_DET_SIZE, + .config_return_buffer_size_in_kbytes = 1792, + .compressed_buffer_segment_size_in_kbytes = 64, + .meta_fifo_size_in_kentries = 32, + .zero_size_buffer_entries = 512, + .compbuf_reserved_space_64b = 256, + .compbuf_reserved_space_zs = 64, + .dpp_output_buffer_pixels = 2560, + .opp_output_buffer_lines = 1, + .pixel_chunk_size_kbytes = 8, + .meta_chunk_size_kbytes = 2, + .min_meta_chunk_size_bytes = 256, + .writeback_chunk_size_kbytes = 8, + .ptoi_supported = false, + .num_dsc = 3, + .maximum_dsc_bits_per_component = 10, + .dsc422_native_support = false, + .is_line_buffer_bpp_fixed = true, + .line_buffer_fixed_bpp = 48, + .line_buffer_size_bits = 789504, + .max_line_buffer_lines = 12, + .writeback_interface_buffer_size_kbytes = 90, + .max_num_dpp = 4, + .max_num_otg = 4, + .max_num_hdmi_frl_outputs = 1, + .max_num_wb = 1, + .max_dchub_pscl_bw_pix_per_clk = 4, + .max_pscl_lb_bw_pix_per_clk = 2, + .max_lb_vscl_bw_pix_per_clk = 4, + .max_vscl_hscl_bw_pix_per_clk = 4, + .max_hscl_ratio = 6, + .max_vscl_ratio = 6, + .max_hscl_taps = 8, + .max_vscl_taps = 8, + .dpte_buffer_size_in_pte_reqs_luma = 64, + .dpte_buffer_size_in_pte_reqs_chroma = 34, + .dispclk_ramp_margin_percent = 1, + .max_inter_dcn_tile_repeaters = 8, + .cursor_buffer_size = 16, + .cursor_chunk_size = 2, + .writeback_line_buffer_buffer_size = 0, + .writeback_min_hscl_ratio = 1, + .writeback_min_vscl_ratio = 1, + .writeback_max_hscl_ratio = 1, + .writeback_max_vscl_ratio = 1, + .writeback_max_hscl_taps = 1, + .writeback_max_vscl_taps = 1, + .dppclk_delay_subtotal = 46, + .dppclk_delay_scl = 50, + .dppclk_delay_scl_lb_only = 16, + .dppclk_delay_cnvc_formatter = 27, + .dppclk_delay_cnvc_cursor = 6, + .dispclk_delay_subtotal = 119, + .dynamic_metadata_vm_enabled = false, + .odm_combine_4to1_supported = false, + .dcc_supported = true, +}; + +static struct _vcs_dpi_soc_bounding_box_st dcn3_1_soc = { + /*TODO: correct dispclk/dppclk voltage level determination*/ + .clock_limits = { + { + .state = 0, + .dispclk_mhz = 1200.0, + .dppclk_mhz = 1200.0, + .phyclk_mhz = 600.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 186.0, + .dtbclk_mhz = 625.0, + }, + { + .state = 1, + .dispclk_mhz = 1200.0, + .dppclk_mhz = 1200.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 209.0, + .dtbclk_mhz = 625.0, + }, + { + .state = 2, + .dispclk_mhz = 1200.0, + .dppclk_mhz = 1200.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 209.0, + .dtbclk_mhz = 625.0, + }, + { + .state = 3, + .dispclk_mhz = 1200.0, + .dppclk_mhz = 1200.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 371.0, + .dtbclk_mhz = 625.0, + }, + { + .state = 4, + .dispclk_mhz = 1200.0, + .dppclk_mhz = 1200.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 417.0, + .dtbclk_mhz = 625.0, + }, + }, + .num_states = 5, + .sr_exit_time_us = 9.0, + .sr_enter_plus_exit_time_us = 11.0, + .sr_exit_z8_time_us = 442.0, + .sr_enter_plus_exit_z8_time_us = 560.0, + .writeback_latency_us = 12.0, + .dram_channel_width_bytes = 4, + .round_trip_ping_latency_dcfclk_cycles = 106, + .urgent_latency_pixel_data_only_us = 4.0, + .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, + .urgent_latency_vm_data_only_us = 4.0, + .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, + .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, + .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, + .pct_ideal_sdp_bw_after_urgent = 80.0, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, + .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, + .max_avg_sdp_bw_use_normal_percent = 60.0, + .max_avg_dram_bw_use_normal_percent = 60.0, + .fabric_datapath_to_dcn_data_return_bytes = 32, + .return_bus_width_bytes = 64, + .downspread_percent = 0.38, + .dcn_downspread_percent = 0.5, + .gpuvm_min_page_size_bytes = 4096, + .hostvm_min_page_size_bytes = 4096, + .do_urgent_latency_adjustment = false, + .urgent_latency_adjustment_fabric_clock_component_us = 0, + .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, +}; + +struct _vcs_dpi_ip_params_st dcn3_15_ip = { + .gpuvm_enable = 1, + .gpuvm_max_page_table_levels = 1, + .hostvm_enable = 1, + .hostvm_max_page_table_levels = 2, + .rob_buffer_size_kbytes = 64, + .det_buffer_size_kbytes = DCN3_15_DEFAULT_DET_SIZE, + .min_comp_buffer_size_kbytes = 64, + .config_return_buffer_size_in_kbytes = 1024, + .compressed_buffer_segment_size_in_kbytes = 64, + .meta_fifo_size_in_kentries = 32, + .zero_size_buffer_entries = 512, + .compbuf_reserved_space_64b = 256, + .compbuf_reserved_space_zs = 64, + .dpp_output_buffer_pixels = 2560, + .opp_output_buffer_lines = 1, + .pixel_chunk_size_kbytes = 8, + .meta_chunk_size_kbytes = 2, + .min_meta_chunk_size_bytes = 256, + .writeback_chunk_size_kbytes = 8, + .ptoi_supported = false, + .num_dsc = 3, + .maximum_dsc_bits_per_component = 10, + .dsc422_native_support = false, + .is_line_buffer_bpp_fixed = true, + .line_buffer_fixed_bpp = 48, + .line_buffer_size_bits = 789504, + .max_line_buffer_lines = 12, + .writeback_interface_buffer_size_kbytes = 90, + .max_num_dpp = 4, + .max_num_otg = 4, + .max_num_hdmi_frl_outputs = 1, + .max_num_wb = 1, + .max_dchub_pscl_bw_pix_per_clk = 4, + .max_pscl_lb_bw_pix_per_clk = 2, + .max_lb_vscl_bw_pix_per_clk = 4, + .max_vscl_hscl_bw_pix_per_clk = 4, + .max_hscl_ratio = 6, + .max_vscl_ratio = 6, + .max_hscl_taps = 8, + .max_vscl_taps = 8, + .dpte_buffer_size_in_pte_reqs_luma = 64, + .dpte_buffer_size_in_pte_reqs_chroma = 34, + .dispclk_ramp_margin_percent = 1, + .max_inter_dcn_tile_repeaters = 9, + .cursor_buffer_size = 16, + .cursor_chunk_size = 2, + .writeback_line_buffer_buffer_size = 0, + .writeback_min_hscl_ratio = 1, + .writeback_min_vscl_ratio = 1, + .writeback_max_hscl_ratio = 1, + .writeback_max_vscl_ratio = 1, + .writeback_max_hscl_taps = 1, + .writeback_max_vscl_taps = 1, + .dppclk_delay_subtotal = 46, + .dppclk_delay_scl = 50, + .dppclk_delay_scl_lb_only = 16, + .dppclk_delay_cnvc_formatter = 27, + .dppclk_delay_cnvc_cursor = 6, + .dispclk_delay_subtotal = 119, + .dynamic_metadata_vm_enabled = false, + .odm_combine_4to1_supported = false, + .dcc_supported = true, +}; + +static struct _vcs_dpi_soc_bounding_box_st dcn3_15_soc = { + .sr_exit_time_us = 9.0, + .sr_enter_plus_exit_time_us = 11.0, + .sr_exit_z8_time_us = 50.0, + .sr_enter_plus_exit_z8_time_us = 50.0, + .writeback_latency_us = 12.0, + .dram_channel_width_bytes = 4, + .round_trip_ping_latency_dcfclk_cycles = 106, + .urgent_latency_pixel_data_only_us = 4.0, + .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, + .urgent_latency_vm_data_only_us = 4.0, + .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, + .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, + .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, + .pct_ideal_sdp_bw_after_urgent = 80.0, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, + .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, + .max_avg_sdp_bw_use_normal_percent = 60.0, + .max_avg_dram_bw_use_normal_percent = 60.0, + .fabric_datapath_to_dcn_data_return_bytes = 32, + .return_bus_width_bytes = 64, + .downspread_percent = 0.38, + .dcn_downspread_percent = 0.38, + .gpuvm_min_page_size_bytes = 4096, + .hostvm_min_page_size_bytes = 4096, + .do_urgent_latency_adjustment = false, + .urgent_latency_adjustment_fabric_clock_component_us = 0, + .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, + .num_chans = 4, + .dummy_pstate_latency_us = 10.0 +}; + +struct _vcs_dpi_ip_params_st dcn3_16_ip = { + .gpuvm_enable = 1, + .gpuvm_max_page_table_levels = 1, + .hostvm_enable = 1, + .hostvm_max_page_table_levels = 2, + .rob_buffer_size_kbytes = 64, + .det_buffer_size_kbytes = DCN3_16_DEFAULT_DET_SIZE, + .min_comp_buffer_size_kbytes = 64, + .config_return_buffer_size_in_kbytes = 1024, + .compressed_buffer_segment_size_in_kbytes = 64, + .meta_fifo_size_in_kentries = 32, + .zero_size_buffer_entries = 512, + .compbuf_reserved_space_64b = 256, + .compbuf_reserved_space_zs = 64, + .dpp_output_buffer_pixels = 2560, + .opp_output_buffer_lines = 1, + .pixel_chunk_size_kbytes = 8, + .meta_chunk_size_kbytes = 2, + .min_meta_chunk_size_bytes = 256, + .writeback_chunk_size_kbytes = 8, + .ptoi_supported = false, + .num_dsc = 3, + .maximum_dsc_bits_per_component = 10, + .dsc422_native_support = false, + .is_line_buffer_bpp_fixed = true, + .line_buffer_fixed_bpp = 48, + .line_buffer_size_bits = 789504, + .max_line_buffer_lines = 12, + .writeback_interface_buffer_size_kbytes = 90, + .max_num_dpp = 4, + .max_num_otg = 4, + .max_num_hdmi_frl_outputs = 1, + .max_num_wb = 1, + .max_dchub_pscl_bw_pix_per_clk = 4, + .max_pscl_lb_bw_pix_per_clk = 2, + .max_lb_vscl_bw_pix_per_clk = 4, + .max_vscl_hscl_bw_pix_per_clk = 4, + .max_hscl_ratio = 6, + .max_vscl_ratio = 6, + .max_hscl_taps = 8, + .max_vscl_taps = 8, + .dpte_buffer_size_in_pte_reqs_luma = 64, + .dpte_buffer_size_in_pte_reqs_chroma = 34, + .dispclk_ramp_margin_percent = 1, + .max_inter_dcn_tile_repeaters = 8, + .cursor_buffer_size = 16, + .cursor_chunk_size = 2, + .writeback_line_buffer_buffer_size = 0, + .writeback_min_hscl_ratio = 1, + .writeback_min_vscl_ratio = 1, + .writeback_max_hscl_ratio = 1, + .writeback_max_vscl_ratio = 1, + .writeback_max_hscl_taps = 1, + .writeback_max_vscl_taps = 1, + .dppclk_delay_subtotal = 46, + .dppclk_delay_scl = 50, + .dppclk_delay_scl_lb_only = 16, + .dppclk_delay_cnvc_formatter = 27, + .dppclk_delay_cnvc_cursor = 6, + .dispclk_delay_subtotal = 119, + .dynamic_metadata_vm_enabled = false, + .odm_combine_4to1_supported = false, + .dcc_supported = true, +}; + +static struct _vcs_dpi_soc_bounding_box_st dcn3_16_soc = { + /*TODO: correct dispclk/dppclk voltage level determination*/ + .clock_limits = { + { + .state = 0, + .dispclk_mhz = 556.0, + .dppclk_mhz = 556.0, + .phyclk_mhz = 600.0, + .phyclk_d18_mhz = 445.0, + .dscclk_mhz = 186.0, + .dtbclk_mhz = 625.0, + }, + { + .state = 1, + .dispclk_mhz = 625.0, + .dppclk_mhz = 625.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 209.0, + .dtbclk_mhz = 625.0, + }, + { + .state = 2, + .dispclk_mhz = 625.0, + .dppclk_mhz = 625.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 209.0, + .dtbclk_mhz = 625.0, + }, + { + .state = 3, + .dispclk_mhz = 1112.0, + .dppclk_mhz = 1112.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 371.0, + .dtbclk_mhz = 625.0, + }, + { + .state = 4, + .dispclk_mhz = 1250.0, + .dppclk_mhz = 1250.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 417.0, + .dtbclk_mhz = 625.0, + }, + }, + .num_states = 5, + .sr_exit_time_us = 9.0, + .sr_enter_plus_exit_time_us = 11.0, + .sr_exit_z8_time_us = 442.0, + .sr_enter_plus_exit_z8_time_us = 560.0, + .writeback_latency_us = 12.0, + .dram_channel_width_bytes = 4, + .round_trip_ping_latency_dcfclk_cycles = 106, + .urgent_latency_pixel_data_only_us = 4.0, + .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, + .urgent_latency_vm_data_only_us = 4.0, + .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, + .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, + .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, + .pct_ideal_sdp_bw_after_urgent = 80.0, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, + .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, + .max_avg_sdp_bw_use_normal_percent = 60.0, + .max_avg_dram_bw_use_normal_percent = 60.0, + .fabric_datapath_to_dcn_data_return_bytes = 32, + .return_bus_width_bytes = 64, + .downspread_percent = 0.38, + .dcn_downspread_percent = 0.5, + .gpuvm_min_page_size_bytes = 4096, + .hostvm_min_page_size_bytes = 4096, + .do_urgent_latency_adjustment = false, + .urgent_latency_adjustment_fabric_clock_component_us = 0, + .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, +}; + +void dcn31_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes, + int pipe_cnt) +{ + dc_assert_fp_enabled(); + + pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0; + pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0; +} + +void dcn31_update_soc_for_wm_a(struct dc *dc, struct dc_state *context) +{ + dc_assert_fp_enabled(); + + if (dc->clk_mgr->bw_params->wm_table.entries[WM_A].valid) { + context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].pstate_latency_us; + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].sr_enter_plus_exit_time_us; + context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].sr_exit_time_us; + } +} + +void dcn315_update_soc_for_wm_a(struct dc *dc, struct dc_state *context) +{ + dc_assert_fp_enabled(); + + if (dc->clk_mgr->bw_params->wm_table.entries[WM_A].valid) { + /* For 315 pstate change is only supported if possible in vactive */ + if (context->bw_ctx.dml.vba.DRAMClockChangeSupport[context->bw_ctx.dml.vba.VoltageLevel][context->bw_ctx.dml.vba.maxMpcComb] != dm_dram_clock_change_vactive) + context->bw_ctx.dml.soc.dram_clock_change_latency_us = context->bw_ctx.dml.soc.dummy_pstate_latency_us; + else + context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].pstate_latency_us; + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = + dc->clk_mgr->bw_params->wm_table.entries[WM_A].sr_enter_plus_exit_time_us; + context->bw_ctx.dml.soc.sr_exit_time_us = + dc->clk_mgr->bw_params->wm_table.entries[WM_A].sr_exit_time_us; + } +} + +void dcn31_calculate_wm_and_dlg_fp( + struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel) +{ + int i, pipe_idx, total_det = 0, active_hubp_count = 0; + double dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; + + dc_assert_fp_enabled(); + + if (context->bw_ctx.dml.soc.min_dcfclk > dcfclk) + dcfclk = context->bw_ctx.dml.soc.min_dcfclk; + + /* We don't recalculate clocks for 0 pipe configs, which can block + * S0i3 as high clocks will block low power states + * Override any clocks that can block S0i3 to min here + */ + if (pipe_cnt == 0) { + context->bw_ctx.bw.dcn.clk.dcfclk_khz = dcfclk; // always should be vlevel 0 + return; + } + + pipes[0].clks_cfg.voltage = vlevel; + pipes[0].clks_cfg.dcfclk_mhz = dcfclk; + pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz; + + /* Set A: + * All clocks min required + * + * Set A calculated last so that following calculations are based on Set A + */ + dc->res_pool->funcs->update_soc_for_wm_a(dc, context); + context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_z8_ns = get_wm_z8_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_z8_ns = get_wm_z8_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b = context->bw_ctx.bw.dcn.watermarks.a; + context->bw_ctx.bw.dcn.watermarks.c = context->bw_ctx.bw.dcn.watermarks.a; + context->bw_ctx.bw.dcn.watermarks.d = context->bw_ctx.bw.dcn.watermarks.a; + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + if (!context->res_ctx.pipe_ctx[i].stream) + continue; + + if (context->res_ctx.pipe_ctx[i].plane_state) + active_hubp_count++; + + pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt); + pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + + if (dc->config.forced_clocks || dc->debug.max_disp_clk) { + pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz; + pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz; + } + if (dc->debug.min_disp_clk_khz > pipes[pipe_idx].clks_cfg.dispclk_mhz * 1000) + pipes[pipe_idx].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0; + if (dc->debug.min_dpp_clk_khz > pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000) + pipes[pipe_idx].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0; + + pipe_idx++; + } + + dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel); + /* For 31x apu pstate change is only supported if possible in vactive*/ + context->bw_ctx.bw.dcn.clk.p_state_change_support = + context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] == dm_dram_clock_change_vactive; + /* If DCN isn't making memory requests we can allow pstate change and lower clocks */ + if (!active_hubp_count) { + context->bw_ctx.bw.dcn.clk.socclk_khz = 0; + context->bw_ctx.bw.dcn.clk.dppclk_khz = 0; + context->bw_ctx.bw.dcn.clk.dcfclk_khz = 0; + context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = 0; + context->bw_ctx.bw.dcn.clk.dramclk_khz = 0; + context->bw_ctx.bw.dcn.clk.fclk_khz = 0; + context->bw_ctx.bw.dcn.clk.p_state_change_support = true; + for (i = 0; i < dc->res_pool->pipe_count; i++) + if (context->res_ctx.pipe_ctx[i].stream) + context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = 0; + } + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + if (!context->res_ctx.pipe_ctx[i].stream) + continue; + + context->res_ctx.pipe_ctx[i].det_buffer_size_kb = + get_det_buffer_size_kbytes(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + if (context->res_ctx.pipe_ctx[i].det_buffer_size_kb > 384) + context->res_ctx.pipe_ctx[i].det_buffer_size_kb /= 2; + total_det += context->res_ctx.pipe_ctx[i].det_buffer_size_kb; + pipe_idx++; + } + context->bw_ctx.bw.dcn.compbuf_size_kb = context->bw_ctx.dml.ip.config_return_buffer_size_in_kbytes - total_det; +} + +void dcn31_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) +{ + struct _vcs_dpi_voltage_scaling_st *s = dc->scratch.update_bw_bounding_box.clock_limits; + struct clk_limit_table *clk_table = &bw_params->clk_table; + unsigned int i, closest_clk_lvl; + int j; + + dc_assert_fp_enabled(); + + memcpy(s, dcn3_1_soc.clock_limits, sizeof(dcn3_1_soc.clock_limits)); + + // Default clock levels are used for diags, which may lead to overclocking. + if (!IS_DIAG_DC(dc->ctx->dce_environment)) { + int max_dispclk_mhz = 0, max_dppclk_mhz = 0; + + dcn3_1_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator; + dcn3_1_ip.max_num_dpp = dc->res_pool->pipe_count; + dcn3_1_soc.num_chans = bw_params->num_channels; + + ASSERT(clk_table->num_entries); + + /* Prepass to find max clocks independent of voltage level. */ + for (i = 0; i < clk_table->num_entries; ++i) { + if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz) + max_dispclk_mhz = clk_table->entries[i].dispclk_mhz; + if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz) + max_dppclk_mhz = clk_table->entries[i].dppclk_mhz; + } + + for (i = 0; i < clk_table->num_entries; i++) { + /* loop backwards*/ + for (closest_clk_lvl = 0, j = dcn3_1_soc.num_states - 1; j >= 0; j--) { + if ((unsigned int) dcn3_1_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) { + closest_clk_lvl = j; + break; + } + } + + s[i].state = i; + + /* Clocks dependent on voltage level. */ + s[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; + s[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz; + s[i].socclk_mhz = clk_table->entries[i].socclk_mhz; + s[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * + 2 * clk_table->entries[i].wck_ratio; + + /* Clocks independent of voltage level. */ + s[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz : + dcn3_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz; + + s[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz : + dcn3_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz; + + s[i].dram_bw_per_chan_gbps = + dcn3_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; + s[i].dscclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz; + s[i].dtbclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; + s[i].phyclk_d18_mhz = + dcn3_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; + s[i].phyclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz; + } + if (clk_table->num_entries) { + dcn3_1_soc.num_states = clk_table->num_entries; + } + } + + memcpy(dcn3_1_soc.clock_limits, s, sizeof(dcn3_1_soc.clock_limits)); + + dcn3_1_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; + dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; + + if ((int)(dcn3_1_soc.dram_clock_change_latency_us * 1000) + != dc->debug.dram_clock_change_latency_ns + && dc->debug.dram_clock_change_latency_ns) { + dcn3_1_soc.dram_clock_change_latency_us = dc->debug.dram_clock_change_latency_ns / 1000; + } + + if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) + dml_init_instance(&dc->dml, &dcn3_1_soc, &dcn3_1_ip, DML_PROJECT_DCN31); + else + dml_init_instance(&dc->dml, &dcn3_1_soc, &dcn3_1_ip, DML_PROJECT_DCN31_FPGA); +} + +void dcn315_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) +{ + struct clk_limit_table *clk_table = &bw_params->clk_table; + int i, max_dispclk_mhz = 0, max_dppclk_mhz = 0; + + dc_assert_fp_enabled(); + + dcn3_15_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator; + dcn3_15_ip.max_num_dpp = dc->res_pool->pipe_count; + + if (bw_params->num_channels > 0) + dcn3_15_soc.num_chans = bw_params->num_channels; + if (bw_params->dram_channel_width_bytes > 0) + dcn3_15_soc.dram_channel_width_bytes = bw_params->dram_channel_width_bytes; + + ASSERT(clk_table->num_entries); + + /* Setup soc to always use max dispclk/dppclk to avoid odm-to-lower-voltage */ + for (i = 0; i < clk_table->num_entries; ++i) { + if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz) + max_dispclk_mhz = clk_table->entries[i].dispclk_mhz; + if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz) + max_dppclk_mhz = clk_table->entries[i].dppclk_mhz; + } + + for (i = 0; i < clk_table->num_entries; i++) { + dcn3_15_soc.clock_limits[i].state = i; + + /* Clocks dependent on voltage level. */ + dcn3_15_soc.clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; + dcn3_15_soc.clock_limits[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz; + dcn3_15_soc.clock_limits[i].socclk_mhz = clk_table->entries[i].socclk_mhz; + dcn3_15_soc.clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio; + + /* These aren't actually read from smu, but rather set in clk_mgr defaults */ + dcn3_15_soc.clock_limits[i].dtbclk_mhz = clk_table->entries[i].dtbclk_mhz; + dcn3_15_soc.clock_limits[i].phyclk_d18_mhz = clk_table->entries[i].phyclk_d18_mhz; + dcn3_15_soc.clock_limits[i].phyclk_mhz = clk_table->entries[i].phyclk_mhz; + + /* Clocks independent of voltage level. */ + dcn3_15_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz; + dcn3_15_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz; + dcn3_15_soc.clock_limits[i].dscclk_mhz = max_dispclk_mhz / 3.0; + } + dcn3_15_soc.num_states = clk_table->num_entries; + + + /* Set vco to max_dispclk * 2 to make sure the highest dispclk is always available for dml calcs, + * no impact outside of dml validation + */ + dcn3_15_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2; + + if ((int)(dcn3_15_soc.dram_clock_change_latency_us * 1000) + != dc->debug.dram_clock_change_latency_ns + && dc->debug.dram_clock_change_latency_ns) { + dcn3_15_soc.dram_clock_change_latency_us = dc->debug.dram_clock_change_latency_ns / 1000; + } + + if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) + dml_init_instance(&dc->dml, &dcn3_15_soc, &dcn3_15_ip, DML_PROJECT_DCN315); + else + dml_init_instance(&dc->dml, &dcn3_15_soc, &dcn3_15_ip, DML_PROJECT_DCN31_FPGA); +} + +void dcn316_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) +{ + struct _vcs_dpi_voltage_scaling_st *s = dc->scratch.update_bw_bounding_box.clock_limits; + struct clk_limit_table *clk_table = &bw_params->clk_table; + unsigned int i, closest_clk_lvl; + int max_dispclk_mhz = 0, max_dppclk_mhz = 0; + int j; + + dc_assert_fp_enabled(); + + memcpy(s, dcn3_16_soc.clock_limits, sizeof(dcn3_16_soc.clock_limits)); + + // Default clock levels are used for diags, which may lead to overclocking. + if (!IS_DIAG_DC(dc->ctx->dce_environment)) { + + dcn3_16_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator; + dcn3_16_ip.max_num_dpp = dc->res_pool->pipe_count; + dcn3_16_soc.num_chans = bw_params->num_channels; + + ASSERT(clk_table->num_entries); + + /* Prepass to find max clocks independent of voltage level. */ + for (i = 0; i < clk_table->num_entries; ++i) { + if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz) + max_dispclk_mhz = clk_table->entries[i].dispclk_mhz; + if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz) + max_dppclk_mhz = clk_table->entries[i].dppclk_mhz; + } + + for (i = 0; i < clk_table->num_entries; i++) { + /* loop backwards*/ + for (closest_clk_lvl = 0, j = dcn3_16_soc.num_states - 1; j >= 0; j--) { + if ((unsigned int) dcn3_16_soc.clock_limits[j].dcfclk_mhz <= + clk_table->entries[i].dcfclk_mhz) { + closest_clk_lvl = j; + break; + } + } + // Ported from DCN315 + if (clk_table->num_entries == 1) { + /*smu gives one DPM level, let's take the highest one*/ + closest_clk_lvl = dcn3_16_soc.num_states - 1; + } + + s[i].state = i; + + /* Clocks dependent on voltage level. */ + s[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; + if (clk_table->num_entries == 1 && + s[i].dcfclk_mhz < + dcn3_16_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) { + /*SMU fix not released yet*/ + s[i].dcfclk_mhz = + dcn3_16_soc.clock_limits[closest_clk_lvl].dcfclk_mhz; + } + s[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz; + s[i].socclk_mhz = clk_table->entries[i].socclk_mhz; + s[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * + 2 * clk_table->entries[i].wck_ratio; + + /* Clocks independent of voltage level. */ + s[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz : + dcn3_16_soc.clock_limits[closest_clk_lvl].dispclk_mhz; + + s[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz : + dcn3_16_soc.clock_limits[closest_clk_lvl].dppclk_mhz; + + s[i].dram_bw_per_chan_gbps = + dcn3_16_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; + s[i].dscclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dscclk_mhz; + s[i].dtbclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; + s[i].phyclk_d18_mhz = + dcn3_16_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; + s[i].phyclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].phyclk_mhz; + } + if (clk_table->num_entries) { + dcn3_16_soc.num_states = clk_table->num_entries; + } + } + + memcpy(dcn3_16_soc.clock_limits, s, sizeof(dcn3_16_soc.clock_limits)); + + if (max_dispclk_mhz) { + dcn3_16_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2; + dc->dml.soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2; + } + if ((int)(dcn3_16_soc.dram_clock_change_latency_us * 1000) + != dc->debug.dram_clock_change_latency_ns + && dc->debug.dram_clock_change_latency_ns) { + dcn3_16_soc.dram_clock_change_latency_us = dc->debug.dram_clock_change_latency_ns / 1000; + } + + if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) + dml_init_instance(&dc->dml, &dcn3_16_soc, &dcn3_16_ip, DML_PROJECT_DCN31); + else + dml_init_instance(&dc->dml, &dcn3_16_soc, &dcn3_16_ip, DML_PROJECT_DCN31_FPGA); +} + +int dcn_get_max_non_odm_pix_rate_100hz(struct _vcs_dpi_soc_bounding_box_st *soc) +{ + return soc->clock_limits[0].dispclk_mhz * 10000.0 / (1.0 + soc->dcn_downspread_percent / 100.0); +} + +int dcn_get_approx_det_segs_required_for_pstate( + struct _vcs_dpi_soc_bounding_box_st *soc, + int pix_clk_100hz, int bpp, int seg_size_kb) +{ + /* Roughly calculate required crb to hide latency. In practice there is slightly + * more buffer available for latency hiding + */ + return (int)(soc->dram_clock_change_latency_us * pix_clk_100hz * bpp + / 10240000 + seg_size_kb - 1) / seg_size_kb; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h new file mode 100644 index 000000000..99518f64d --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h @@ -0,0 +1,54 @@ +/* + * Copyright 2019-2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DCN31_FPU_H__ +#define __DCN31_FPU_H__ + +#define DCN3_1_DEFAULT_DET_SIZE 384 +#define DCN3_15_DEFAULT_DET_SIZE 192 +#define DCN3_15_MIN_COMPBUF_SIZE_KB 128 +#define DCN3_16_DEFAULT_DET_SIZE 192 + +void dcn31_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes, + int pipe_cnt); + +void dcn31_update_soc_for_wm_a(struct dc *dc, struct dc_state *context); +void dcn315_update_soc_for_wm_a(struct dc *dc, struct dc_state *context); + +void dcn31_calculate_wm_and_dlg_fp( + struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel); + +void dcn31_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params); +void dcn315_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params); +void dcn316_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params); +int dcn_get_max_non_odm_pix_rate_100hz(struct _vcs_dpi_soc_bounding_box_st *soc); +int dcn_get_approx_det_segs_required_for_pstate( + struct _vcs_dpi_soc_bounding_box_st *soc, + int pix_clk_100hz, int bpp, int seg_size_kb); + +#endif /* __DCN31_FPU_H__*/ diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c new file mode 100644 index 000000000..9c84561ff --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c @@ -0,0 +1,7186 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dc.h" +#include "dc_link.h" +#include "../display_mode_lib.h" +#include "../dcn30/display_mode_vba_30.h" +#include "display_mode_vba_31.h" +#include "../dml_inline_defs.h" + +/* + * NOTE: + * This file is gcc-parsable HW gospel, coming straight from HW engineers. + * + * It doesn't adhere to Linux kernel style and sometimes will do things in odd + * ways. Unless there is something clearly wrong with it the code should + * remain as-is as it provides us with a guarantee from HW that it is correct. + */ + +#define BPP_INVALID 0 +#define BPP_BLENDED_PIPE 0xffffffff +#define DCN31_MAX_DSC_IMAGE_WIDTH 5184 +#define DCN31_MAX_FMT_420_BUFFER_WIDTH 4096 +#define DCN3_15_MIN_COMPBUF_SIZE_KB 128 +#define DCN3_15_MAX_DET_SIZE 384 + +// For DML-C changes that hasn't been propagated to VBA yet +//#define __DML_VBA_ALLOW_DELTA__ + +// Move these to ip paramaters/constant + +// At which vstartup the DML start to try if the mode can be supported +#define __DML_VBA_MIN_VSTARTUP__ 9 + +// Delay in DCFCLK from ARB to DET (1st num is ARB to SDPIF, 2nd number is SDPIF to DET) +#define __DML_ARB_TO_RET_DELAY__ (7 + 95) + +// fudge factor for min dcfclk calclation +#define __DML_MIN_DCFCLK_FACTOR__ 1.15 + +typedef struct { + double DPPCLK; + double DISPCLK; + double PixelClock; + double DCFCLKDeepSleep; + unsigned int DPPPerPlane; + bool ScalerEnabled; + double VRatio; + double VRatioChroma; + enum scan_direction_class SourceScan; + unsigned int BlockWidth256BytesY; + unsigned int BlockHeight256BytesY; + unsigned int BlockWidth256BytesC; + unsigned int BlockHeight256BytesC; + unsigned int InterlaceEnable; + unsigned int NumberOfCursors; + unsigned int VBlank; + unsigned int HTotal; + unsigned int DCCEnable; + bool ODMCombineIsEnabled; + enum source_format_class SourcePixelFormat; + int BytePerPixelY; + int BytePerPixelC; + bool ProgressiveToInterlaceUnitInOPP; +} Pipe; + +#define BPP_INVALID 0 +#define BPP_BLENDED_PIPE 0xffffffff + +static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib); +static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib); +static unsigned int dscceComputeDelay( + unsigned int bpc, + double BPP, + unsigned int sliceWidth, + unsigned int numSlices, + enum output_format_class pixelFormat, + enum output_encoder_class Output); +static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output); +static bool CalculatePrefetchSchedule( + struct display_mode_lib *mode_lib, + double HostVMInefficiencyFactor, + Pipe *myPipe, + unsigned int DSCDelay, + double DPPCLKDelaySubtotalPlusCNVCFormater, + double DPPCLKDelaySCL, + double DPPCLKDelaySCLLBOnly, + double DPPCLKDelayCNVCCursor, + double DISPCLKDelaySubtotal, + unsigned int DPP_RECOUT_WIDTH, + enum output_format_class OutputFormat, + unsigned int MaxInterDCNTileRepeaters, + unsigned int VStartup, + unsigned int MaxVStartup, + unsigned int GPUVMPageTableLevels, + bool GPUVMEnable, + bool HostVMEnable, + unsigned int HostVMMaxNonCachedPageTableLevels, + double HostVMMinPageSize, + bool DynamicMetadataEnable, + bool DynamicMetadataVMEnabled, + int DynamicMetadataLinesBeforeActiveRequired, + unsigned int DynamicMetadataTransmittedBytes, + double UrgentLatency, + double UrgentExtraLatency, + double TCalc, + unsigned int PDEAndMetaPTEBytesFrame, + unsigned int MetaRowByte, + unsigned int PixelPTEBytesPerRow, + double PrefetchSourceLinesY, + unsigned int SwathWidthY, + double VInitPreFillY, + unsigned int MaxNumSwathY, + double PrefetchSourceLinesC, + unsigned int SwathWidthC, + double VInitPreFillC, + unsigned int MaxNumSwathC, + int swath_width_luma_ub, + int swath_width_chroma_ub, + unsigned int SwathHeightY, + unsigned int SwathHeightC, + double TWait, + double *DSTXAfterScaler, + double *DSTYAfterScaler, + double *DestinationLinesForPrefetch, + double *PrefetchBandwidth, + double *DestinationLinesToRequestVMInVBlank, + double *DestinationLinesToRequestRowInVBlank, + double *VRatioPrefetchY, + double *VRatioPrefetchC, + double *RequiredPrefetchPixDataBWLuma, + double *RequiredPrefetchPixDataBWChroma, + bool *NotEnoughTimeForDynamicMetadata, + double *Tno_bw, + double *prefetch_vmrow_bw, + double *Tdmdl_vm, + double *Tdmdl, + double *TSetup, + int *VUpdateOffsetPix, + double *VUpdateWidthPix, + double *VReadyOffsetPix); +static double RoundToDFSGranularityUp(double Clock, double VCOSpeed); +static double RoundToDFSGranularityDown(double Clock, double VCOSpeed); +static void CalculateDCCConfiguration( + bool DCCEnabled, + bool DCCProgrammingAssumesScanDirectionUnknown, + enum source_format_class SourcePixelFormat, + unsigned int SurfaceWidthLuma, + unsigned int SurfaceWidthChroma, + unsigned int SurfaceHeightLuma, + unsigned int SurfaceHeightChroma, + double DETBufferSize, + unsigned int RequestHeight256ByteLuma, + unsigned int RequestHeight256ByteChroma, + enum dm_swizzle_mode TilingFormat, + unsigned int BytePerPixelY, + unsigned int BytePerPixelC, + double BytePerPixelDETY, + double BytePerPixelDETC, + enum scan_direction_class ScanOrientation, + unsigned int *MaxUncompressedBlockLuma, + unsigned int *MaxUncompressedBlockChroma, + unsigned int *MaxCompressedBlockLuma, + unsigned int *MaxCompressedBlockChroma, + unsigned int *IndependentBlockLuma, + unsigned int *IndependentBlockChroma); +static double CalculatePrefetchSourceLines( + struct display_mode_lib *mode_lib, + double VRatio, + double vtaps, + bool Interlace, + bool ProgressiveToInterlaceUnitInOPP, + unsigned int SwathHeight, + unsigned int ViewportYStart, + double *VInitPreFill, + unsigned int *MaxNumSwath); +static unsigned int CalculateVMAndRowBytes( + struct display_mode_lib *mode_lib, + bool DCCEnable, + unsigned int BlockHeight256Bytes, + unsigned int BlockWidth256Bytes, + enum source_format_class SourcePixelFormat, + unsigned int SurfaceTiling, + unsigned int BytePerPixel, + enum scan_direction_class ScanDirection, + unsigned int SwathWidth, + unsigned int ViewportHeight, + bool GPUVMEnable, + bool HostVMEnable, + unsigned int HostVMMaxNonCachedPageTableLevels, + unsigned int GPUVMMinPageSize, + unsigned int HostVMMinPageSize, + unsigned int PTEBufferSizeInRequests, + unsigned int Pitch, + unsigned int DCCMetaPitch, + unsigned int *MacroTileWidth, + unsigned int *MetaRowByte, + unsigned int *PixelPTEBytesPerRow, + bool *PTEBufferSizeNotExceeded, + int *dpte_row_width_ub, + unsigned int *dpte_row_height, + unsigned int *MetaRequestWidth, + unsigned int *MetaRequestHeight, + unsigned int *meta_row_width, + unsigned int *meta_row_height, + int *vm_group_bytes, + unsigned int *dpte_group_bytes, + unsigned int *PixelPTEReqWidth, + unsigned int *PixelPTEReqHeight, + unsigned int *PTERequestSize, + int *DPDE0BytesFrame, + int *MetaPTEBytesFrame); +static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime); +static void CalculateRowBandwidth( + bool GPUVMEnable, + enum source_format_class SourcePixelFormat, + double VRatio, + double VRatioChroma, + bool DCCEnable, + double LineTime, + unsigned int MetaRowByteLuma, + unsigned int MetaRowByteChroma, + unsigned int meta_row_height_luma, + unsigned int meta_row_height_chroma, + unsigned int PixelPTEBytesPerRowLuma, + unsigned int PixelPTEBytesPerRowChroma, + unsigned int dpte_row_height_luma, + unsigned int dpte_row_height_chroma, + double *meta_row_bw, + double *dpte_row_bw); + +static void CalculateFlipSchedule( + struct display_mode_lib *mode_lib, + unsigned int k, + double HostVMInefficiencyFactor, + double UrgentExtraLatency, + double UrgentLatency, + double PDEAndMetaPTEBytesPerFrame, + double MetaRowBytes, + double DPTEBytesPerRow); +static double CalculateWriteBackDelay( + enum source_format_class WritebackPixelFormat, + double WritebackHRatio, + double WritebackVRatio, + unsigned int WritebackVTaps, + int WritebackDestinationWidth, + int WritebackDestinationHeight, + int WritebackSourceHeight, + unsigned int HTotal); + +static void CalculateVupdateAndDynamicMetadataParameters( + int MaxInterDCNTileRepeaters, + double DPPCLK, + double DISPCLK, + double DCFClkDeepSleep, + double PixelClock, + int HTotal, + int VBlank, + int DynamicMetadataTransmittedBytes, + int DynamicMetadataLinesBeforeActiveRequired, + int InterlaceEnable, + bool ProgressiveToInterlaceUnitInOPP, + double *TSetup, + double *Tdmbf, + double *Tdmec, + double *Tdmsks, + int *VUpdateOffsetPix, + double *VUpdateWidthPix, + double *VReadyOffsetPix); + +static void CalculateWatermarksAndDRAMSpeedChangeSupport( + struct display_mode_lib *mode_lib, + unsigned int PrefetchMode, + double DCFCLK, + double ReturnBW, + double UrgentLatency, + double ExtraLatency, + double SOCCLK, + double DCFCLKDeepSleep, + unsigned int DETBufferSizeY[], + unsigned int DETBufferSizeC[], + unsigned int SwathHeightY[], + unsigned int SwathHeightC[], + double SwathWidthY[], + double SwathWidthC[], + unsigned int DPPPerPlane[], + double BytePerPixelDETY[], + double BytePerPixelDETC[], + bool UnboundedRequestEnabled, + int unsigned CompressedBufferSizeInkByte, + enum clock_change_support *DRAMClockChangeSupport, + double *StutterExitWatermark, + double *StutterEnterPlusExitWatermark, + double *Z8StutterExitWatermark, + double *Z8StutterEnterPlusExitWatermark); + +static void CalculateDCFCLKDeepSleep( + struct display_mode_lib *mode_lib, + unsigned int NumberOfActivePlanes, + int BytePerPixelY[], + int BytePerPixelC[], + double VRatio[], + double VRatioChroma[], + double SwathWidthY[], + double SwathWidthC[], + unsigned int DPPPerPlane[], + double HRatio[], + double HRatioChroma[], + double PixelClock[], + double PSCL_THROUGHPUT[], + double PSCL_THROUGHPUT_CHROMA[], + double DPPCLK[], + double ReadBandwidthLuma[], + double ReadBandwidthChroma[], + int ReturnBusWidth, + double *DCFCLKDeepSleep); + +static void CalculateUrgentBurstFactor( + int swath_width_luma_ub, + int swath_width_chroma_ub, + unsigned int SwathHeightY, + unsigned int SwathHeightC, + double LineTime, + double UrgentLatency, + double CursorBufferSize, + unsigned int CursorWidth, + unsigned int CursorBPP, + double VRatio, + double VRatioC, + double BytePerPixelInDETY, + double BytePerPixelInDETC, + double DETBufferSizeY, + double DETBufferSizeC, + double *UrgentBurstFactorCursor, + double *UrgentBurstFactorLuma, + double *UrgentBurstFactorChroma, + bool *NotEnoughUrgentLatencyHiding); + +static void UseMinimumDCFCLK( + struct display_mode_lib *mode_lib, + int MaxPrefetchMode, + int ReorderingBytes); + +static void CalculatePixelDeliveryTimes( + unsigned int NumberOfActivePlanes, + double VRatio[], + double VRatioChroma[], + double VRatioPrefetchY[], + double VRatioPrefetchC[], + unsigned int swath_width_luma_ub[], + unsigned int swath_width_chroma_ub[], + unsigned int DPPPerPlane[], + double HRatio[], + double HRatioChroma[], + double PixelClock[], + double PSCL_THROUGHPUT[], + double PSCL_THROUGHPUT_CHROMA[], + double DPPCLK[], + int BytePerPixelC[], + enum scan_direction_class SourceScan[], + unsigned int NumberOfCursors[], + unsigned int CursorWidth[][DC__NUM_CURSOR__MAX], + unsigned int CursorBPP[][DC__NUM_CURSOR__MAX], + unsigned int BlockWidth256BytesY[], + unsigned int BlockHeight256BytesY[], + unsigned int BlockWidth256BytesC[], + unsigned int BlockHeight256BytesC[], + double DisplayPipeLineDeliveryTimeLuma[], + double DisplayPipeLineDeliveryTimeChroma[], + double DisplayPipeLineDeliveryTimeLumaPrefetch[], + double DisplayPipeLineDeliveryTimeChromaPrefetch[], + double DisplayPipeRequestDeliveryTimeLuma[], + double DisplayPipeRequestDeliveryTimeChroma[], + double DisplayPipeRequestDeliveryTimeLumaPrefetch[], + double DisplayPipeRequestDeliveryTimeChromaPrefetch[], + double CursorRequestDeliveryTime[], + double CursorRequestDeliveryTimePrefetch[]); + +static void CalculateMetaAndPTETimes( + int NumberOfActivePlanes, + bool GPUVMEnable, + int MetaChunkSize, + int MinMetaChunkSizeBytes, + int HTotal[], + double VRatio[], + double VRatioChroma[], + double DestinationLinesToRequestRowInVBlank[], + double DestinationLinesToRequestRowInImmediateFlip[], + bool DCCEnable[], + double PixelClock[], + int BytePerPixelY[], + int BytePerPixelC[], + enum scan_direction_class SourceScan[], + int dpte_row_height[], + int dpte_row_height_chroma[], + int meta_row_width[], + int meta_row_width_chroma[], + int meta_row_height[], + int meta_row_height_chroma[], + int meta_req_width[], + int meta_req_width_chroma[], + int meta_req_height[], + int meta_req_height_chroma[], + int dpte_group_bytes[], + int PTERequestSizeY[], + int PTERequestSizeC[], + int PixelPTEReqWidthY[], + int PixelPTEReqHeightY[], + int PixelPTEReqWidthC[], + int PixelPTEReqHeightC[], + int dpte_row_width_luma_ub[], + int dpte_row_width_chroma_ub[], + double DST_Y_PER_PTE_ROW_NOM_L[], + double DST_Y_PER_PTE_ROW_NOM_C[], + double DST_Y_PER_META_ROW_NOM_L[], + double DST_Y_PER_META_ROW_NOM_C[], + double TimePerMetaChunkNominal[], + double TimePerChromaMetaChunkNominal[], + double TimePerMetaChunkVBlank[], + double TimePerChromaMetaChunkVBlank[], + double TimePerMetaChunkFlip[], + double TimePerChromaMetaChunkFlip[], + double time_per_pte_group_nom_luma[], + double time_per_pte_group_vblank_luma[], + double time_per_pte_group_flip_luma[], + double time_per_pte_group_nom_chroma[], + double time_per_pte_group_vblank_chroma[], + double time_per_pte_group_flip_chroma[]); + +static void CalculateVMGroupAndRequestTimes( + unsigned int NumberOfActivePlanes, + bool GPUVMEnable, + unsigned int GPUVMMaxPageTableLevels, + unsigned int HTotal[], + int BytePerPixelC[], + double DestinationLinesToRequestVMInVBlank[], + double DestinationLinesToRequestVMInImmediateFlip[], + bool DCCEnable[], + double PixelClock[], + int dpte_row_width_luma_ub[], + int dpte_row_width_chroma_ub[], + int vm_group_bytes[], + unsigned int dpde0_bytes_per_frame_ub_l[], + unsigned int dpde0_bytes_per_frame_ub_c[], + int meta_pte_bytes_per_frame_ub_l[], + int meta_pte_bytes_per_frame_ub_c[], + double TimePerVMGroupVBlank[], + double TimePerVMGroupFlip[], + double TimePerVMRequestVBlank[], + double TimePerVMRequestFlip[]); + +static void CalculateStutterEfficiency( + struct display_mode_lib *mode_lib, + int CompressedBufferSizeInkByte, + bool UnboundedRequestEnabled, + int ConfigReturnBufferSizeInKByte, + int MetaFIFOSizeInKEntries, + int ZeroSizeBufferEntries, + int NumberOfActivePlanes, + int ROBBufferSizeInKByte, + double TotalDataReadBandwidth, + double DCFCLK, + double ReturnBW, + double COMPBUF_RESERVED_SPACE_64B, + double COMPBUF_RESERVED_SPACE_ZS, + double SRExitTime, + double SRExitZ8Time, + bool SynchronizedVBlank, + double Z8StutterEnterPlusExitWatermark, + double StutterEnterPlusExitWatermark, + bool ProgressiveToInterlaceUnitInOPP, + bool Interlace[], + double MinTTUVBlank[], + int DPPPerPlane[], + unsigned int DETBufferSizeY[], + int BytePerPixelY[], + double BytePerPixelDETY[], + double SwathWidthY[], + int SwathHeightY[], + int SwathHeightC[], + double NetDCCRateLuma[], + double NetDCCRateChroma[], + double DCCFractionOfZeroSizeRequestsLuma[], + double DCCFractionOfZeroSizeRequestsChroma[], + int HTotal[], + int VTotal[], + double PixelClock[], + double VRatio[], + enum scan_direction_class SourceScan[], + int BlockHeight256BytesY[], + int BlockWidth256BytesY[], + int BlockHeight256BytesC[], + int BlockWidth256BytesC[], + int DCCYMaxUncompressedBlock[], + int DCCCMaxUncompressedBlock[], + int VActive[], + bool DCCEnable[], + bool WritebackEnable[], + double ReadBandwidthPlaneLuma[], + double ReadBandwidthPlaneChroma[], + double meta_row_bw[], + double dpte_row_bw[], + double *StutterEfficiencyNotIncludingVBlank, + double *StutterEfficiency, + int *NumberOfStutterBurstsPerFrame, + double *Z8StutterEfficiencyNotIncludingVBlank, + double *Z8StutterEfficiency, + int *Z8NumberOfStutterBurstsPerFrame, + double *StutterPeriod); + +static void CalculateSwathAndDETConfiguration( + bool ForceSingleDPP, + int NumberOfActivePlanes, + bool DETSharedByAllDPP, + unsigned int DETBufferSizeInKByte[], + double MaximumSwathWidthLuma[], + double MaximumSwathWidthChroma[], + enum scan_direction_class SourceScan[], + enum source_format_class SourcePixelFormat[], + enum dm_swizzle_mode SurfaceTiling[], + int ViewportWidth[], + int ViewportHeight[], + int SurfaceWidthY[], + int SurfaceWidthC[], + int SurfaceHeightY[], + int SurfaceHeightC[], + int Read256BytesBlockHeightY[], + int Read256BytesBlockHeightC[], + int Read256BytesBlockWidthY[], + int Read256BytesBlockWidthC[], + enum odm_combine_mode ODMCombineEnabled[], + int BlendingAndTiming[], + int BytePerPixY[], + int BytePerPixC[], + double BytePerPixDETY[], + double BytePerPixDETC[], + int HActive[], + double HRatio[], + double HRatioChroma[], + int DPPPerPlane[], + int swath_width_luma_ub[], + int swath_width_chroma_ub[], + double SwathWidth[], + double SwathWidthChroma[], + int SwathHeightY[], + int SwathHeightC[], + unsigned int DETBufferSizeY[], + unsigned int DETBufferSizeC[], + bool ViewportSizeSupportPerPlane[], + bool *ViewportSizeSupport); +static void CalculateSwathWidth( + bool ForceSingleDPP, + int NumberOfActivePlanes, + enum source_format_class SourcePixelFormat[], + enum scan_direction_class SourceScan[], + int ViewportWidth[], + int ViewportHeight[], + int SurfaceWidthY[], + int SurfaceWidthC[], + int SurfaceHeightY[], + int SurfaceHeightC[], + enum odm_combine_mode ODMCombineEnabled[], + int BytePerPixY[], + int BytePerPixC[], + int Read256BytesBlockHeightY[], + int Read256BytesBlockHeightC[], + int Read256BytesBlockWidthY[], + int Read256BytesBlockWidthC[], + int BlendingAndTiming[], + int HActive[], + double HRatio[], + int DPPPerPlane[], + double SwathWidthSingleDPPY[], + double SwathWidthSingleDPPC[], + double SwathWidthY[], + double SwathWidthC[], + int MaximumSwathHeightY[], + int MaximumSwathHeightC[], + int swath_width_luma_ub[], + int swath_width_chroma_ub[]); + +static double CalculateExtraLatency( + int RoundTripPingLatencyCycles, + int ReorderingBytes, + double DCFCLK, + int TotalNumberOfActiveDPP, + int PixelChunkSizeInKByte, + int TotalNumberOfDCCActiveDPP, + int MetaChunkSize, + double ReturnBW, + bool GPUVMEnable, + bool HostVMEnable, + int NumberOfActivePlanes, + int NumberOfDPP[], + int dpte_group_bytes[], + double HostVMInefficiencyFactor, + double HostVMMinPageSize, + int HostVMMaxNonCachedPageTableLevels); + +static double CalculateExtraLatencyBytes( + int ReorderingBytes, + int TotalNumberOfActiveDPP, + int PixelChunkSizeInKByte, + int TotalNumberOfDCCActiveDPP, + int MetaChunkSize, + bool GPUVMEnable, + bool HostVMEnable, + int NumberOfActivePlanes, + int NumberOfDPP[], + int dpte_group_bytes[], + double HostVMInefficiencyFactor, + double HostVMMinPageSize, + int HostVMMaxNonCachedPageTableLevels); + +static double CalculateUrgentLatency( + double UrgentLatencyPixelDataOnly, + double UrgentLatencyPixelMixedWithVMData, + double UrgentLatencyVMDataOnly, + bool DoUrgentLatencyAdjustment, + double UrgentLatencyAdjustmentFabricClockComponent, + double UrgentLatencyAdjustmentFabricClockReference, + double FabricClockSingle); + +static void CalculateUnboundedRequestAndCompressedBufferSize( + unsigned int DETBufferSizeInKByte, + int ConfigReturnBufferSizeInKByte, + enum unbounded_requesting_policy UseUnboundedRequestingFinal, + int TotalActiveDPP, + bool NoChromaPlanes, + int MaxNumDPP, + int CompressedBufferSegmentSizeInkByteFinal, + enum output_encoder_class *Output, + bool *UnboundedRequestEnabled, + int *CompressedBufferSizeInkByte); + +static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output); + +void dml31_recalculate(struct display_mode_lib *mode_lib) +{ + ModeSupportAndSystemConfiguration(mode_lib); + PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib); + DisplayPipeConfiguration(mode_lib); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: Calling DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation\n", __func__); +#endif + DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib); +} + +static unsigned int dscceComputeDelay( + unsigned int bpc, + double BPP, + unsigned int sliceWidth, + unsigned int numSlices, + enum output_format_class pixelFormat, + enum output_encoder_class Output) +{ + // valid bpc = source bits per component in the set of {8, 10, 12} + // valid bpp = increments of 1/16 of a bit + // min = 6/7/8 in N420/N422/444, respectively + // max = such that compression is 1:1 + //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode) + //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4} + //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420} + + // fixed value + unsigned int rcModelSize = 8192; + + // N422/N420 operate at 2 pixels per clock + unsigned int pixelsPerClock = 0, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L, Delay, pixels; + + if (pixelFormat == dm_420) + pixelsPerClock = 2; + else if (pixelFormat == dm_444) + pixelsPerClock = 1; + else if (pixelFormat == dm_n422) + pixelsPerClock = 2; + // #all other modes operate at 1 pixel per clock + else + pixelsPerClock = 1; + + //initial transmit delay as per PPS + initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock); + + //compute ssm delay + if (bpc == 8) + D = 81; + else if (bpc == 10) + D = 89; + else + D = 113; + + //divide by pixel per cycle to compute slice width as seen by DSC + w = sliceWidth / pixelsPerClock; + + //422 mode has an additional cycle of delay + if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422) + s = 0; + else + s = 1; + + //main calculation for the dscce + ix = initalXmitDelay + 45; + wx = (w + 2) / 3; + P = 3 * wx - w; + l0 = ix / w; + a = ix + P * l0; + ax = (a + 2) / 3 + D + 6 + 1; + L = (ax + wx - 1) / wx; + if ((ix % w) == 0 && P != 0) + lstall = 1; + else + lstall = 0; + Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22; + + //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels + pixels = Delay * 3 * pixelsPerClock; + return pixels; +} + +static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output) +{ + unsigned int Delay = 0; + + if (pixelFormat == dm_420) { + // sfr + Delay = Delay + 2; + // dsccif + Delay = Delay + 0; + // dscc - input deserializer + Delay = Delay + 3; + // dscc gets pixels every other cycle + Delay = Delay + 2; + // dscc - input cdc fifo + Delay = Delay + 12; + // dscc gets pixels every other cycle + Delay = Delay + 13; + // dscc - cdc uncertainty + Delay = Delay + 2; + // dscc - output cdc fifo + Delay = Delay + 7; + // dscc gets pixels every other cycle + Delay = Delay + 3; + // dscc - cdc uncertainty + Delay = Delay + 2; + // dscc - output serializer + Delay = Delay + 1; + // sft + Delay = Delay + 1; + } else if (pixelFormat == dm_n422) { + // sfr + Delay = Delay + 2; + // dsccif + Delay = Delay + 1; + // dscc - input deserializer + Delay = Delay + 5; + // dscc - input cdc fifo + Delay = Delay + 25; + // dscc - cdc uncertainty + Delay = Delay + 2; + // dscc - output cdc fifo + Delay = Delay + 10; + // dscc - cdc uncertainty + Delay = Delay + 2; + // dscc - output serializer + Delay = Delay + 1; + // sft + Delay = Delay + 1; + } else { + // sfr + Delay = Delay + 2; + // dsccif + Delay = Delay + 0; + // dscc - input deserializer + Delay = Delay + 3; + // dscc - input cdc fifo + Delay = Delay + 12; + // dscc - cdc uncertainty + Delay = Delay + 2; + // dscc - output cdc fifo + Delay = Delay + 7; + // dscc - output serializer + Delay = Delay + 1; + // dscc - cdc uncertainty + Delay = Delay + 2; + // sft + Delay = Delay + 1; + } + + return Delay; +} + +static bool CalculatePrefetchSchedule( + struct display_mode_lib *mode_lib, + double HostVMInefficiencyFactor, + Pipe *myPipe, + unsigned int DSCDelay, + double DPPCLKDelaySubtotalPlusCNVCFormater, + double DPPCLKDelaySCL, + double DPPCLKDelaySCLLBOnly, + double DPPCLKDelayCNVCCursor, + double DISPCLKDelaySubtotal, + unsigned int DPP_RECOUT_WIDTH, + enum output_format_class OutputFormat, + unsigned int MaxInterDCNTileRepeaters, + unsigned int VStartup, + unsigned int MaxVStartup, + unsigned int GPUVMPageTableLevels, + bool GPUVMEnable, + bool HostVMEnable, + unsigned int HostVMMaxNonCachedPageTableLevels, + double HostVMMinPageSize, + bool DynamicMetadataEnable, + bool DynamicMetadataVMEnabled, + int DynamicMetadataLinesBeforeActiveRequired, + unsigned int DynamicMetadataTransmittedBytes, + double UrgentLatency, + double UrgentExtraLatency, + double TCalc, + unsigned int PDEAndMetaPTEBytesFrame, + unsigned int MetaRowByte, + unsigned int PixelPTEBytesPerRow, + double PrefetchSourceLinesY, + unsigned int SwathWidthY, + double VInitPreFillY, + unsigned int MaxNumSwathY, + double PrefetchSourceLinesC, + unsigned int SwathWidthC, + double VInitPreFillC, + unsigned int MaxNumSwathC, + int swath_width_luma_ub, + int swath_width_chroma_ub, + unsigned int SwathHeightY, + unsigned int SwathHeightC, + double TWait, + double *DSTXAfterScaler, + double *DSTYAfterScaler, + double *DestinationLinesForPrefetch, + double *PrefetchBandwidth, + double *DestinationLinesToRequestVMInVBlank, + double *DestinationLinesToRequestRowInVBlank, + double *VRatioPrefetchY, + double *VRatioPrefetchC, + double *RequiredPrefetchPixDataBWLuma, + double *RequiredPrefetchPixDataBWChroma, + bool *NotEnoughTimeForDynamicMetadata, + double *Tno_bw, + double *prefetch_vmrow_bw, + double *Tdmdl_vm, + double *Tdmdl, + double *TSetup, + int *VUpdateOffsetPix, + double *VUpdateWidthPix, + double *VReadyOffsetPix) +{ + bool MyError = false; + unsigned int DPPCycles, DISPCLKCycles; + double DSTTotalPixelsAfterScaler; + double LineTime; + double dst_y_prefetch_equ; + double Tsw_oto; + double prefetch_bw_oto; + double prefetch_bw_pr; + double Tvm_oto; + double Tr0_oto; + double Tvm_oto_lines; + double Tr0_oto_lines; + double dst_y_prefetch_oto; + double TimeForFetchingMetaPTE = 0; + double TimeForFetchingRowInVBlank = 0; + double LinesToRequestPrefetchPixelData = 0; + unsigned int HostVMDynamicLevelsTrips; + double trip_to_mem; + double Tvm_trips; + double Tr0_trips; + double Tvm_trips_rounded; + double Tr0_trips_rounded; + double Lsw_oto; + double Tpre_rounded; + double prefetch_bw_equ; + double Tvm_equ; + double Tr0_equ; + double Tdmbf; + double Tdmec; + double Tdmsks; + double prefetch_sw_bytes; + double bytes_pp; + double dep_bytes; + int max_vratio_pre = 4; + double min_Lsw; + double Tsw_est1 = 0; + double Tsw_est3 = 0; + double max_Tsw = 0; + + if (GPUVMEnable == true && HostVMEnable == true) { + HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; + } else { + HostVMDynamicLevelsTrips = 0; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: GPUVMEnable=%d HostVMEnable=%d HostVMInefficiencyFactor=%f\n", __func__, GPUVMEnable, HostVMEnable, HostVMInefficiencyFactor); +#endif + CalculateVupdateAndDynamicMetadataParameters( + MaxInterDCNTileRepeaters, + myPipe->DPPCLK, + myPipe->DISPCLK, + myPipe->DCFCLKDeepSleep, + myPipe->PixelClock, + myPipe->HTotal, + myPipe->VBlank, + DynamicMetadataTransmittedBytes, + DynamicMetadataLinesBeforeActiveRequired, + myPipe->InterlaceEnable, + myPipe->ProgressiveToInterlaceUnitInOPP, + TSetup, + &Tdmbf, + &Tdmec, + &Tdmsks, + VUpdateOffsetPix, + VUpdateWidthPix, + VReadyOffsetPix); + + LineTime = myPipe->HTotal / myPipe->PixelClock; + trip_to_mem = UrgentLatency; + Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1); + +#ifdef __DML_VBA_ALLOW_DELTA__ + if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) { +#else + if (DynamicMetadataVMEnabled == true) { +#endif + *Tdmdl = TWait + Tvm_trips + trip_to_mem; + } else { + *Tdmdl = TWait + UrgentExtraLatency; + } + +#ifdef __DML_VBA_ALLOW_DELTA__ + if (DynamicMetadataEnable == false) { + *Tdmdl = 0.0; + } +#endif + + if (DynamicMetadataEnable == true) { + if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) { + *NotEnoughTimeForDynamicMetadata = true; + dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__); + dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf); + dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec); + dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, Tdmsks); + dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *Tdmdl); + } else { + *NotEnoughTimeForDynamicMetadata = false; + } + } else { + *NotEnoughTimeForDynamicMetadata = false; + } + + *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0); + + if (myPipe->ScalerEnabled) + DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL; + else + DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly; + + DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor; + + DISPCLKCycles = DISPCLKDelaySubtotal; + + if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0) + return true; + + *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles); + dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock); + dml_print("DML::%s: DPPCLK: %f\n", __func__, myPipe->DPPCLK); + dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles); + dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->DISPCLK); + dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay); + dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler); + dml_print("DML::%s: ODMCombineIsEnabled: %d\n", __func__, myPipe->ODMCombineIsEnabled); +#endif + + *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineIsEnabled) ? 18 : 0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH; + + if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP)) + *DSTYAfterScaler = 1; + else + *DSTYAfterScaler = 0; + + DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler; + *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1); + *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal)); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler); +#endif + + MyError = false; + + Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1); + Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime; + Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime; + +#ifdef __DML_VBA_ALLOW_DELTA__ + if (!myPipe->DCCEnable) { + Tr0_trips = 0.0; + Tr0_trips_rounded = 0.0; + } +#endif + + if (!GPUVMEnable) { + Tvm_trips = 0.0; + Tvm_trips_rounded = 0.0; + } + + if (GPUVMEnable) { + if (GPUVMPageTableLevels >= 3) { + *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1); + } else { + *Tno_bw = 0; + } + } else if (!myPipe->DCCEnable) { + *Tno_bw = LineTime; + } else { + *Tno_bw = LineTime / 4; + } + + if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 || myPipe->SourcePixelFormat == dm_420_12) + bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4; + else + bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC; + /*rev 99*/ + prefetch_bw_pr = dml_min(1, bytes_pp * myPipe->PixelClock / (double) myPipe->DPPPerPlane); + max_Tsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime; + prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC; + prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerPlane, prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime)); + prefetch_bw_oto = dml_max(prefetch_bw_pr, prefetch_sw_bytes / max_Tsw); + + min_Lsw = dml_max(1, dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre); + Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1) / 4; + Tsw_oto = Lsw_oto * LineTime; + + prefetch_bw_oto = (PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC) / Tsw_oto; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML: HTotal: %d\n", myPipe->HTotal); + dml_print("DML: prefetch_bw_oto: %f\n", prefetch_bw_oto); + dml_print("DML: PrefetchSourceLinesY: %f\n", PrefetchSourceLinesY); + dml_print("DML: swath_width_luma_ub: %d\n", swath_width_luma_ub); + dml_print("DML: BytePerPixelY: %d\n", myPipe->BytePerPixelY); + dml_print("DML: Tsw_oto: %f\n", Tsw_oto); +#endif + + if (GPUVMEnable == true) + Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, Tvm_trips, LineTime / 4.0); + else + Tvm_oto = LineTime / 4.0; + + if ((GPUVMEnable == true || myPipe->DCCEnable == true)) { + Tr0_oto = dml_max4((MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, Tr0_trips, // PREVIOUS_ERROR (missing this term) + LineTime - Tvm_oto, + LineTime / 4); + } else { + Tr0_oto = (LineTime - Tvm_oto) / 2.0; + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips); + dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips); + dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, MetaRowByte); + dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); + dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); + dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); + dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto); + dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto); + dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto); +#endif + + Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0; + Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0; + dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto; + dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal); + dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0; + Tpre_rounded = dst_y_prefetch_equ * LineTime; + + dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor); + + if (prefetch_sw_bytes < dep_bytes) + prefetch_sw_bytes = 2 * dep_bytes; + + dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto); + dml_print("DML: Tvm_oto_lines: %f\n", Tvm_oto_lines); + dml_print("DML: Tr0_oto_lines: %f\n", Tr0_oto_lines); + dml_print("DML: Lsw_oto: %f\n", Lsw_oto); + dml_print("DML: LineTime: %f\n", LineTime); + dml_print("DML: dst_y_prefetch_equ: %f (after round)\n", dst_y_prefetch_equ); + + dml_print("DML: LineTime: %f\n", LineTime); + dml_print("DML: VStartup: %d\n", VStartup); + dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime); + dml_print("DML: TSetup: %fus - time from vstartup to vready\n", *TSetup); + dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc); + dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait); + dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf); + dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec); + dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks); + dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd \n", *Tdmdl_vm); + dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl); + dml_print("DML: DSTXAfterScaler: %f pixels - number of pixel clocks pipeline and buffer delay after scaler \n", *DSTXAfterScaler); + dml_print("DML: DSTYAfterScaler: %f lines - number of lines of pipeline and buffer delay after scaler \n", *DSTYAfterScaler); + + *PrefetchBandwidth = 0; + *DestinationLinesToRequestVMInVBlank = 0; + *DestinationLinesToRequestRowInVBlank = 0; + *VRatioPrefetchY = 0; + *VRatioPrefetchC = 0; + *RequiredPrefetchPixDataBWLuma = 0; + if (dst_y_prefetch_equ > 1) { + double PrefetchBandwidth1; + double PrefetchBandwidth2; + double PrefetchBandwidth3; + double PrefetchBandwidth4; + + if (Tpre_rounded - *Tno_bw > 0) { + PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor + + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw); + Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1; + } else { + PrefetchBandwidth1 = 0; + } + + if (VStartup == MaxVStartup && Tsw_est1 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) { + PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) + / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw); + } + + if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0) + PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded); + else + PrefetchBandwidth2 = 0; + + if (Tpre_rounded - Tvm_trips_rounded > 0) { + PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor + + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded); + Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3; + } else { + PrefetchBandwidth3 = 0; + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded); + dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded); + dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3); +#endif + if (VStartup == MaxVStartup && Tsw_est3 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded > 0) { + PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) + / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded); + } + + if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) + PrefetchBandwidth4 = prefetch_sw_bytes / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded); + else + PrefetchBandwidth4 = 0; + + { + bool Case1OK; + bool Case2OK; + bool Case3OK; + + if (PrefetchBandwidth1 > 0) { + if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= Tvm_trips_rounded + && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) { + Case1OK = true; + } else { + Case1OK = false; + } + } else { + Case1OK = false; + } + + if (PrefetchBandwidth2 > 0) { + if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= Tvm_trips_rounded + && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) { + Case2OK = true; + } else { + Case2OK = false; + } + } else { + Case2OK = false; + } + + if (PrefetchBandwidth3 > 0) { + if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < Tvm_trips_rounded + && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) { + Case3OK = true; + } else { + Case3OK = false; + } + } else { + Case3OK = false; + } + + if (Case1OK) { + prefetch_bw_equ = PrefetchBandwidth1; + } else if (Case2OK) { + prefetch_bw_equ = PrefetchBandwidth2; + } else if (Case3OK) { + prefetch_bw_equ = PrefetchBandwidth3; + } else { + prefetch_bw_equ = PrefetchBandwidth4; + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK); + dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK); + dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK); + dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ); +#endif + + if (prefetch_bw_equ > 0) { + if (GPUVMEnable == true) { + Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4); + } else { + Tvm_equ = LineTime / 4; + } + + if ((GPUVMEnable == true || myPipe->DCCEnable == true)) { + Tr0_equ = dml_max4( + (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ, + Tr0_trips, + (LineTime - Tvm_equ) / 2, + LineTime / 4); + } else { + Tr0_equ = (LineTime - Tvm_equ) / 2; + } + } else { + Tvm_equ = 0; + Tr0_equ = 0; + dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__); + } + } + + if (dst_y_prefetch_oto < dst_y_prefetch_equ) { + *DestinationLinesForPrefetch = dst_y_prefetch_oto; + TimeForFetchingMetaPTE = Tvm_oto; + TimeForFetchingRowInVBlank = Tr0_oto; + *PrefetchBandwidth = prefetch_bw_oto; + } else { + *DestinationLinesForPrefetch = dst_y_prefetch_equ; + TimeForFetchingMetaPTE = Tvm_equ; + TimeForFetchingRowInVBlank = Tr0_equ; + *PrefetchBandwidth = prefetch_bw_equ; + } + + *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0; + + *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0; + +#ifdef __DML_VBA_ALLOW_DELTA__ + LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch + // See note above dated 5/30/2018 + // - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ? + - ((GPUVMEnable || myPipe->DCCEnable) ? (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) : 0.0); // TODO: Did someone else add this?? +#else + LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank; +#endif + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch); + dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank); + dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank); + dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); + dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank); + dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); + dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData); +#endif + + if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) { + + *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData; + *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); + dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY); + dml_print("DML::%s: VInitPreFillY = %f\n", __func__, VInitPreFillY); +#endif + if ((SwathHeightY > 4) && (VInitPreFillY > 3)) { + if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) { + *VRatioPrefetchY = dml_max( + (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData, + (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0)); + *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); + } else { + MyError = true; + dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); + *VRatioPrefetchY = 0; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); + dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); + dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY); +#endif + } + + *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData; + *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC); + dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC); + dml_print("DML::%s: VInitPreFillC = %f\n", __func__, VInitPreFillC); +#endif + if ((SwathHeightC > 4) || VInitPreFillC > 3) { + if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) { + *VRatioPrefetchC = dml_max( + *VRatioPrefetchC, + (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0)); + *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); + } else { + MyError = true; + dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); + *VRatioPrefetchC = 0; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC); + dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC); + dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC); +#endif + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY); + dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); + dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); +#endif + + *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub / LineTime; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *RequiredPrefetchPixDataBWLuma); +#endif + + *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelC * swath_width_chroma_ub + / LineTime; + } else { + MyError = true; + dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); + dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData); + *VRatioPrefetchY = 0; + *VRatioPrefetchC = 0; + *RequiredPrefetchPixDataBWLuma = 0; + *RequiredPrefetchPixDataBWChroma = 0; + } + + dml_print( + "DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n", + (double) LinesToRequestPrefetchPixelData * LineTime + 2.0 * TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE); + dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE); + dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank); + dml_print( + "DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", + (double) LinesToRequestPrefetchPixelData * LineTime); + dml_print("DML: To: %fus - time for propagation from scaler to optc\n", + (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / + (double) myPipe->HTotal)) * LineTime); + dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n"); + dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", + VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank + - (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup); + dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow); + + } else { + MyError = true; + dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); + } + + { + double prefetch_vm_bw; + double prefetch_row_bw; + + if (PDEAndMetaPTEBytesFrame == 0) { + prefetch_vm_bw = 0; + } else if (*DestinationLinesToRequestVMInVBlank > 0) { +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); + dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); + dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank); + dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); +#endif + prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw); +#endif + } else { + prefetch_vm_bw = 0; + MyError = true; + dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); + } + + if (MetaRowByte + PixelPTEBytesPerRow == 0) { + prefetch_row_bw = 0; + } else if (*DestinationLinesToRequestRowInVBlank > 0) { + prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); + dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); + dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank); + dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw); +#endif + } else { + prefetch_row_bw = 0; + MyError = true; + dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); + } + + *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw); + } + + if (MyError) { + *PrefetchBandwidth = 0; + TimeForFetchingMetaPTE = 0; + TimeForFetchingRowInVBlank = 0; + *DestinationLinesToRequestVMInVBlank = 0; + *DestinationLinesToRequestRowInVBlank = 0; + *DestinationLinesForPrefetch = 0; + LinesToRequestPrefetchPixelData = 0; + *VRatioPrefetchY = 0; + *VRatioPrefetchC = 0; + *RequiredPrefetchPixDataBWLuma = 0; + *RequiredPrefetchPixDataBWChroma = 0; + } + + return MyError; +} + +static double RoundToDFSGranularityUp(double Clock, double VCOSpeed) +{ + return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1); +} + +static double RoundToDFSGranularityDown(double Clock, double VCOSpeed) +{ + return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1); +} + +static void CalculateDCCConfiguration( + bool DCCEnabled, + bool DCCProgrammingAssumesScanDirectionUnknown, + enum source_format_class SourcePixelFormat, + unsigned int SurfaceWidthLuma, + unsigned int SurfaceWidthChroma, + unsigned int SurfaceHeightLuma, + unsigned int SurfaceHeightChroma, + double DETBufferSize, + unsigned int RequestHeight256ByteLuma, + unsigned int RequestHeight256ByteChroma, + enum dm_swizzle_mode TilingFormat, + unsigned int BytePerPixelY, + unsigned int BytePerPixelC, + double BytePerPixelDETY, + double BytePerPixelDETC, + enum scan_direction_class ScanOrientation, + unsigned int *MaxUncompressedBlockLuma, + unsigned int *MaxUncompressedBlockChroma, + unsigned int *MaxCompressedBlockLuma, + unsigned int *MaxCompressedBlockChroma, + unsigned int *IndependentBlockLuma, + unsigned int *IndependentBlockChroma) +{ + int yuv420; + int horz_div_l; + int horz_div_c; + int vert_div_l; + int vert_div_c; + + int swath_buf_size; + double detile_buf_vp_horz_limit; + double detile_buf_vp_vert_limit; + + int MAS_vp_horz_limit; + int MAS_vp_vert_limit; + int max_vp_horz_width; + int max_vp_vert_height; + int eff_surf_width_l; + int eff_surf_width_c; + int eff_surf_height_l; + int eff_surf_height_c; + + int full_swath_bytes_horz_wc_l; + int full_swath_bytes_horz_wc_c; + int full_swath_bytes_vert_wc_l; + int full_swath_bytes_vert_wc_c; + int req128_horz_wc_l; + int req128_horz_wc_c; + int req128_vert_wc_l; + int req128_vert_wc_c; + int segment_order_horz_contiguous_luma; + int segment_order_horz_contiguous_chroma; + int segment_order_vert_contiguous_luma; + int segment_order_vert_contiguous_chroma; + + typedef enum { + REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA + } RequestType; + RequestType RequestLuma; + RequestType RequestChroma; + + yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0); + horz_div_l = 1; + horz_div_c = 1; + vert_div_l = 1; + vert_div_c = 1; + + if (BytePerPixelY == 1) + vert_div_l = 0; + if (BytePerPixelC == 1) + vert_div_c = 0; + if (BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x)) + horz_div_l = 0; + if (BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x)) + horz_div_c = 0; + + if (BytePerPixelC == 0) { + swath_buf_size = DETBufferSize / 2 - 2 * 256; + detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)); + detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l)); + } else { + swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256; + detile_buf_vp_horz_limit = (double) swath_buf_size + / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l) + + (double) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420)); + detile_buf_vp_vert_limit = (double) swath_buf_size + / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420)); + } + + if (SourcePixelFormat == dm_420_10) { + detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit; + detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit; + } + + detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16); + detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16); + + MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 5760; + MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760); + max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit); + max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit); + eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma); + eff_surf_width_c = eff_surf_width_l / (1 + yuv420); + eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma); + eff_surf_height_c = eff_surf_height_l / (1 + yuv420); + + full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY; + full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma; + if (BytePerPixelC > 0) { + full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC; + full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma; + } else { + full_swath_bytes_horz_wc_c = 0; + full_swath_bytes_vert_wc_c = 0; + } + + if (SourcePixelFormat == dm_420_10) { + full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256); + full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256); + full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256); + full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256); + } + + if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) { + req128_horz_wc_l = 0; + req128_horz_wc_c = 0; + } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSize) { + req128_horz_wc_l = 0; + req128_horz_wc_c = 1; + } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) { + req128_horz_wc_l = 1; + req128_horz_wc_c = 0; + } else { + req128_horz_wc_l = 1; + req128_horz_wc_c = 1; + } + + if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) { + req128_vert_wc_l = 0; + req128_vert_wc_c = 0; + } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSize) { + req128_vert_wc_l = 0; + req128_vert_wc_c = 1; + } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) { + req128_vert_wc_l = 1; + req128_vert_wc_c = 0; + } else { + req128_vert_wc_l = 1; + req128_vert_wc_c = 1; + } + + if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) { + segment_order_horz_contiguous_luma = 0; + } else { + segment_order_horz_contiguous_luma = 1; + } + if ((BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x)) + || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) { + segment_order_vert_contiguous_luma = 0; + } else { + segment_order_vert_contiguous_luma = 1; + } + if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) { + segment_order_horz_contiguous_chroma = 0; + } else { + segment_order_horz_contiguous_chroma = 1; + } + if ((BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x)) + || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) { + segment_order_vert_contiguous_chroma = 0; + } else { + segment_order_vert_contiguous_chroma = 1; + } + + if (DCCProgrammingAssumesScanDirectionUnknown == true) { + if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) { + RequestLuma = REQ_256Bytes; + } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) { + RequestLuma = REQ_128BytesNonContiguous; + } else { + RequestLuma = REQ_128BytesContiguous; + } + if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) { + RequestChroma = REQ_256Bytes; + } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) { + RequestChroma = REQ_128BytesNonContiguous; + } else { + RequestChroma = REQ_128BytesContiguous; + } + } else if (ScanOrientation != dm_vert) { + if (req128_horz_wc_l == 0) { + RequestLuma = REQ_256Bytes; + } else if (segment_order_horz_contiguous_luma == 0) { + RequestLuma = REQ_128BytesNonContiguous; + } else { + RequestLuma = REQ_128BytesContiguous; + } + if (req128_horz_wc_c == 0) { + RequestChroma = REQ_256Bytes; + } else if (segment_order_horz_contiguous_chroma == 0) { + RequestChroma = REQ_128BytesNonContiguous; + } else { + RequestChroma = REQ_128BytesContiguous; + } + } else { + if (req128_vert_wc_l == 0) { + RequestLuma = REQ_256Bytes; + } else if (segment_order_vert_contiguous_luma == 0) { + RequestLuma = REQ_128BytesNonContiguous; + } else { + RequestLuma = REQ_128BytesContiguous; + } + if (req128_vert_wc_c == 0) { + RequestChroma = REQ_256Bytes; + } else if (segment_order_vert_contiguous_chroma == 0) { + RequestChroma = REQ_128BytesNonContiguous; + } else { + RequestChroma = REQ_128BytesContiguous; + } + } + + if (RequestLuma == REQ_256Bytes) { + *MaxUncompressedBlockLuma = 256; + *MaxCompressedBlockLuma = 256; + *IndependentBlockLuma = 0; + } else if (RequestLuma == REQ_128BytesContiguous) { + *MaxUncompressedBlockLuma = 256; + *MaxCompressedBlockLuma = 128; + *IndependentBlockLuma = 128; + } else { + *MaxUncompressedBlockLuma = 256; + *MaxCompressedBlockLuma = 64; + *IndependentBlockLuma = 64; + } + + if (RequestChroma == REQ_256Bytes) { + *MaxUncompressedBlockChroma = 256; + *MaxCompressedBlockChroma = 256; + *IndependentBlockChroma = 0; + } else if (RequestChroma == REQ_128BytesContiguous) { + *MaxUncompressedBlockChroma = 256; + *MaxCompressedBlockChroma = 128; + *IndependentBlockChroma = 128; + } else { + *MaxUncompressedBlockChroma = 256; + *MaxCompressedBlockChroma = 64; + *IndependentBlockChroma = 64; + } + + if (DCCEnabled != true || BytePerPixelC == 0) { + *MaxUncompressedBlockChroma = 0; + *MaxCompressedBlockChroma = 0; + *IndependentBlockChroma = 0; + } + + if (DCCEnabled != true) { + *MaxUncompressedBlockLuma = 0; + *MaxCompressedBlockLuma = 0; + *IndependentBlockLuma = 0; + } +} + +static double CalculatePrefetchSourceLines( + struct display_mode_lib *mode_lib, + double VRatio, + double vtaps, + bool Interlace, + bool ProgressiveToInterlaceUnitInOPP, + unsigned int SwathHeight, + unsigned int ViewportYStart, + double *VInitPreFill, + unsigned int *MaxNumSwath) +{ + struct vba_vars_st *v = &mode_lib->vba; + unsigned int MaxPartialSwath; + + if (ProgressiveToInterlaceUnitInOPP) + *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1); + else + *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1); + + if (!v->IgnoreViewportPositioning) { + + *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0; + + if (*VInitPreFill > 1.0) + MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight; + else + MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight; + MaxPartialSwath = dml_max(1U, MaxPartialSwath); + + } else { + + if (ViewportYStart != 0) + dml_print("WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n"); + + *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1); + + if (*VInitPreFill > 1.0) + MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight; + else + MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) % SwathHeight; + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: VRatio = %f\n", __func__, VRatio); + dml_print("DML::%s: vtaps = %f\n", __func__, vtaps); + dml_print("DML::%s: VInitPreFill = %f\n", __func__, *VInitPreFill); + dml_print("DML::%s: ProgressiveToInterlaceUnitInOPP = %d\n", __func__, ProgressiveToInterlaceUnitInOPP); + dml_print("DML::%s: IgnoreViewportPositioning = %d\n", __func__, v->IgnoreViewportPositioning); + dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight); + dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath); + dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath); + dml_print("DML::%s: Prefetch source lines = %d\n", __func__, *MaxNumSwath * SwathHeight + MaxPartialSwath); +#endif + return *MaxNumSwath * SwathHeight + MaxPartialSwath; +} + +static unsigned int CalculateVMAndRowBytes( + struct display_mode_lib *mode_lib, + bool DCCEnable, + unsigned int BlockHeight256Bytes, + unsigned int BlockWidth256Bytes, + enum source_format_class SourcePixelFormat, + unsigned int SurfaceTiling, + unsigned int BytePerPixel, + enum scan_direction_class ScanDirection, + unsigned int SwathWidth, + unsigned int ViewportHeight, + bool GPUVMEnable, + bool HostVMEnable, + unsigned int HostVMMaxNonCachedPageTableLevels, + unsigned int GPUVMMinPageSize, + unsigned int HostVMMinPageSize, + unsigned int PTEBufferSizeInRequests, + unsigned int Pitch, + unsigned int DCCMetaPitch, + unsigned int *MacroTileWidth, + unsigned int *MetaRowByte, + unsigned int *PixelPTEBytesPerRow, + bool *PTEBufferSizeNotExceeded, + int *dpte_row_width_ub, + unsigned int *dpte_row_height, + unsigned int *MetaRequestWidth, + unsigned int *MetaRequestHeight, + unsigned int *meta_row_width, + unsigned int *meta_row_height, + int *vm_group_bytes, + unsigned int *dpte_group_bytes, + unsigned int *PixelPTEReqWidth, + unsigned int *PixelPTEReqHeight, + unsigned int *PTERequestSize, + int *DPDE0BytesFrame, + int *MetaPTEBytesFrame) +{ + struct vba_vars_st *v = &mode_lib->vba; + unsigned int MPDEBytesFrame; + unsigned int DCCMetaSurfaceBytes; + unsigned int MacroTileSizeBytes; + unsigned int MacroTileHeight; + unsigned int ExtraDPDEBytesFrame; + unsigned int PDEAndMetaPTEBytesFrame; + unsigned int PixelPTEReqHeightPTEs = 0; + unsigned int HostVMDynamicLevels = 0; + double FractionOfPTEReturnDrop; + + if (GPUVMEnable == true && HostVMEnable == true) { + if (HostVMMinPageSize < 2048) { + HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; + } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) { + HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1); + } else { + HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2); + } + } + + *MetaRequestHeight = 8 * BlockHeight256Bytes; + *MetaRequestWidth = 8 * BlockWidth256Bytes; + if (ScanDirection != dm_vert) { + *meta_row_height = *MetaRequestHeight; + *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth; + *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0; + } else { + *meta_row_height = *MetaRequestWidth; + *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight; + *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0; + } + DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes) * BytePerPixel / 256; + if (GPUVMEnable == true) { + *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64; + MPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 1); + } else { + *MetaPTEBytesFrame = 0; + MPDEBytesFrame = 0; + } + + if (DCCEnable != true) { + *MetaPTEBytesFrame = 0; + MPDEBytesFrame = 0; + *MetaRowByte = 0; + } + + if (SurfaceTiling == dm_sw_linear) { + MacroTileSizeBytes = 256; + MacroTileHeight = BlockHeight256Bytes; + } else { + MacroTileSizeBytes = 65536; + MacroTileHeight = 16 * BlockHeight256Bytes; + } + *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight; + + if (GPUVMEnable == true && v->GPUVMMaxPageTableLevels > 1) { + if (ScanDirection != dm_vert) { + *DPDE0BytesFrame = 64 + * (dml_ceil( + ((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) + / (8 * 2097152), + 1) + 1); + } else { + *DPDE0BytesFrame = 64 + * (dml_ceil( + ((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) + / (8 * 2097152), + 1) + 1); + } + ExtraDPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 2); + } else { + *DPDE0BytesFrame = 0; + ExtraDPDEBytesFrame = 0; + } + + PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame); + dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame); + dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame); + dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame); + dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); +#endif + + if (HostVMEnable == true) { + PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels); + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); +#endif + + if (SurfaceTiling == dm_sw_linear) { + PixelPTEReqHeightPTEs = 1; + *PixelPTEReqHeight = 1; + *PixelPTEReqWidth = 32768.0 / BytePerPixel; + *PTERequestSize = 64; + FractionOfPTEReturnDrop = 0; + } else if (MacroTileSizeBytes == 4096) { + PixelPTEReqHeightPTEs = 1; + *PixelPTEReqHeight = MacroTileHeight; + *PixelPTEReqWidth = 8 * *MacroTileWidth; + *PTERequestSize = 64; + if (ScanDirection != dm_vert) + FractionOfPTEReturnDrop = 0; + else + FractionOfPTEReturnDrop = 7 / 8; + } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) { + PixelPTEReqHeightPTEs = 16; + *PixelPTEReqHeight = 16 * BlockHeight256Bytes; + *PixelPTEReqWidth = 16 * BlockWidth256Bytes; + *PTERequestSize = 128; + FractionOfPTEReturnDrop = 0; + } else { + PixelPTEReqHeightPTEs = 1; + *PixelPTEReqHeight = MacroTileHeight; + *PixelPTEReqWidth = 8 * *MacroTileWidth; + *PTERequestSize = 64; + FractionOfPTEReturnDrop = 0; + } + + if (SurfaceTiling == dm_sw_linear) { + *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1)); + *dpte_row_width_ub = (dml_ceil((double)(Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth; + *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize; + } else if (ScanDirection != dm_vert) { + *dpte_row_height = *PixelPTEReqHeight; + *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth; + *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize; + } else { + *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth); + *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight; + *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize; + } + + if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) <= 64 * PTEBufferSizeInRequests) { + *PTEBufferSizeNotExceeded = true; + } else { + *PTEBufferSizeNotExceeded = false; + } + + if (GPUVMEnable != true) { + *PixelPTEBytesPerRow = 0; + *PTEBufferSizeNotExceeded = true; + } + + dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame); + + if (HostVMEnable == true) { + *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels); + } + + if (HostVMEnable == true) { + *vm_group_bytes = 512; + *dpte_group_bytes = 512; + } else if (GPUVMEnable == true) { + *vm_group_bytes = 2048; + if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) { + *dpte_group_bytes = 512; + } else { + *dpte_group_bytes = 2048; + } + } else { + *vm_group_bytes = 0; + *dpte_group_bytes = 0; + } + return PDEAndMetaPTEBytesFrame; +} + +static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib) +{ + struct vba_vars_st *v = &mode_lib->vba; + unsigned int j, k; + double HostVMInefficiencyFactor = 1.0; + bool NoChromaPlanes = true; + int ReorderBytes; + double VMDataOnlyReturnBW; + double MaxTotalRDBandwidth = 0; + int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb]; + + v->WritebackDISPCLK = 0.0; + v->DISPCLKWithRamping = 0; + v->DISPCLKWithoutRamping = 0; + v->GlobalDPPCLK = 0.0; + /* DAL custom code: need to update ReturnBW in case min dcfclk is overriden */ + { + double IdealFabricAndSDPPortBandwidthPerState = dml_min( + v->ReturnBusWidth * v->DCFCLKState[v->VoltageLevel][v->maxMpcComb], + v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn); + double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth; + if (v->HostVMEnable != true) { + v->ReturnBW = dml_min( + IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, + IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0); + } else { + v->ReturnBW = dml_min( + IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, + IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0); + } + } + /* End DAL custom code */ + + // DISPCLK and DPPCLK Calculation + // + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + if (v->WritebackEnable[k]) { + v->WritebackDISPCLK = dml_max( + v->WritebackDISPCLK, + dml31_CalculateWriteBackDISPCLK( + v->WritebackPixelFormat[k], + v->PixelClock[k], + v->WritebackHRatio[k], + v->WritebackVRatio[k], + v->WritebackHTaps[k], + v->WritebackVTaps[k], + v->WritebackSourceWidth[k], + v->WritebackDestinationWidth[k], + v->HTotal[k], + v->WritebackLineBufferSize)); + } + } + + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + if (v->HRatio[k] > 1) { + v->PSCL_THROUGHPUT_LUMA[k] = dml_min( + v->MaxDCHUBToPSCLThroughput, + v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1)); + } else { + v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); + } + + v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k] + * dml_max( + v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), + dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0)); + + if ((v->htaps[k] > 6 || v->vtaps[k] > 6) && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) { + v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k]; + } + + if ((v->SourcePixelFormat[k] != dm_420_8 && v->SourcePixelFormat[k] != dm_420_10 && v->SourcePixelFormat[k] != dm_420_12 + && v->SourcePixelFormat[k] != dm_rgbe_alpha)) { + v->PSCL_THROUGHPUT_CHROMA[k] = 0.0; + v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma; + } else { + if (v->HRatioChroma[k] > 1) { + v->PSCL_THROUGHPUT_CHROMA[k] = dml_min( + v->MaxDCHUBToPSCLThroughput, + v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0)); + } else { + v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); + } + v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k] + * dml_max3( + v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]), + v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k], + 1.0); + + if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6) && v->DPPCLKUsingSingleDPPChroma < 2 * v->PixelClock[k]) { + v->DPPCLKUsingSingleDPPChroma = 2 * v->PixelClock[k]; + } + + v->DPPCLKUsingSingleDPP[k] = dml_max(v->DPPCLKUsingSingleDPPLuma, v->DPPCLKUsingSingleDPPChroma); + } + } + + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + if (v->BlendingAndTiming[k] != k) + continue; + if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) { + v->DISPCLKWithRamping = dml_max( + v->DISPCLKWithRamping, + v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) + * (1 + v->DISPCLKRampingMargin / 100)); + v->DISPCLKWithoutRamping = dml_max( + v->DISPCLKWithoutRamping, + v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)); + } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) { + v->DISPCLKWithRamping = dml_max( + v->DISPCLKWithRamping, + v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) + * (1 + v->DISPCLKRampingMargin / 100)); + v->DISPCLKWithoutRamping = dml_max( + v->DISPCLKWithoutRamping, + v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)); + } else { + v->DISPCLKWithRamping = dml_max( + v->DISPCLKWithRamping, + v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) * (1 + v->DISPCLKRampingMargin / 100)); + v->DISPCLKWithoutRamping = dml_max( + v->DISPCLKWithoutRamping, + v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)); + } + } + + v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping, v->WritebackDISPCLK); + v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping, v->WritebackDISPCLK); + + ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0); + v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithRamping, v->DISPCLKDPPCLKVCOSpeed); + v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithoutRamping, v->DISPCLKDPPCLKVCOSpeed); + v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown( + v->soc.clock_limits[v->soc.num_states - 1].dispclk_mhz, + v->DISPCLKDPPCLKVCOSpeed); + if (v->DISPCLKWithoutRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) { + v->DISPCLK_calculated = v->DISPCLKWithoutRampingRoundedToDFSGranularity; + } else if (v->DISPCLKWithRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) { + v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity; + } else { + v->DISPCLK_calculated = v->DISPCLKWithRampingRoundedToDFSGranularity; + } + v->DISPCLK = v->DISPCLK_calculated; + DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated); + + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k] / v->DPPPerPlane[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); + v->GlobalDPPCLK = dml_max(v->GlobalDPPCLK, v->DPPCLK_calculated[k]); + } + v->GlobalDPPCLK = RoundToDFSGranularityUp(v->GlobalDPPCLK, v->DISPCLKDPPCLKVCOSpeed); + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255 * dml_ceil(v->DPPCLK_calculated[k] * 255.0 / v->GlobalDPPCLK, 1); + DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]); + } + + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + v->DPPCLK[k] = v->DPPCLK_calculated[k]; + } + + // Urgent and B P-State/DRAM Clock Change Watermark + DTRACE(" dcfclk_mhz = %f", v->DCFCLK); + DTRACE(" return_bus_bw = %f", v->ReturnBW); + + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + dml30_CalculateBytePerPixelAnd256BBlockSizes( + v->SourcePixelFormat[k], + v->SurfaceTiling[k], + &v->BytePerPixelY[k], + &v->BytePerPixelC[k], + &v->BytePerPixelDETY[k], + &v->BytePerPixelDETC[k], + &v->BlockHeight256BytesY[k], + &v->BlockHeight256BytesC[k], + &v->BlockWidth256BytesY[k], + &v->BlockWidth256BytesC[k]); + } + + CalculateSwathWidth( + false, + v->NumberOfActivePlanes, + v->SourcePixelFormat, + v->SourceScan, + v->ViewportWidth, + v->ViewportHeight, + v->SurfaceWidthY, + v->SurfaceWidthC, + v->SurfaceHeightY, + v->SurfaceHeightC, + v->ODMCombineEnabled, + v->BytePerPixelY, + v->BytePerPixelC, + v->BlockHeight256BytesY, + v->BlockHeight256BytesC, + v->BlockWidth256BytesY, + v->BlockWidth256BytesC, + v->BlendingAndTiming, + v->HActive, + v->HRatio, + v->DPPPerPlane, + v->SwathWidthSingleDPPY, + v->SwathWidthSingleDPPC, + v->SwathWidthY, + v->SwathWidthC, + v->dummyinteger3, + v->dummyinteger4, + v->swath_width_luma_ub, + v->swath_width_chroma_ub); + + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k]) + * v->VRatio[k]; + v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k]) + * v->VRatioChroma[k]; + DTRACE(" read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]); + } + + // DCFCLK Deep Sleep + CalculateDCFCLKDeepSleep( + mode_lib, + v->NumberOfActivePlanes, + v->BytePerPixelY, + v->BytePerPixelC, + v->VRatio, + v->VRatioChroma, + v->SwathWidthY, + v->SwathWidthC, + v->DPPPerPlane, + v->HRatio, + v->HRatioChroma, + v->PixelClock, + v->PSCL_THROUGHPUT_LUMA, + v->PSCL_THROUGHPUT_CHROMA, + v->DPPCLK, + v->ReadBandwidthPlaneLuma, + v->ReadBandwidthPlaneChroma, + v->ReturnBusWidth, + &v->DCFCLKDeepSleep); + + // DSCCLK + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) { + v->DSCCLK_calculated[k] = 0.0; + } else { + if (v->OutputFormat[k] == dm_420) + v->DSCFormatFactor = 2; + else if (v->OutputFormat[k] == dm_444) + v->DSCFormatFactor = 1; + else if (v->OutputFormat[k] == dm_n422) + v->DSCFormatFactor = 2; + else + v->DSCFormatFactor = 1; + if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) + v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12 / v->DSCFormatFactor + / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); + else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) + v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6 / v->DSCFormatFactor + / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); + else + v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3 / v->DSCFormatFactor + / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); + } + } + + // DSC Delay + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + double BPP = v->OutputBpp[k]; + + if (v->DSCEnabled[k] && BPP != 0) { + if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) { + v->DSCDelay[k] = dscceComputeDelay( + v->DSCInputBitPerComponent[k], + BPP, + dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1), + v->NumberOfDSCSlices[k], + v->OutputFormat[k], + v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]); + } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) { + v->DSCDelay[k] = 2 + * (dscceComputeDelay( + v->DSCInputBitPerComponent[k], + BPP, + dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1), + v->NumberOfDSCSlices[k] / 2.0, + v->OutputFormat[k], + v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); + } else { + v->DSCDelay[k] = 4 + * (dscceComputeDelay( + v->DSCInputBitPerComponent[k], + BPP, + dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1), + v->NumberOfDSCSlices[k] / 4.0, + v->OutputFormat[k], + v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); + } + v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k]; + } else { + v->DSCDelay[k] = 0; + } + } + + for (k = 0; k < v->NumberOfActivePlanes; ++k) + for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes + if (j != k && v->BlendingAndTiming[k] == j && v->DSCEnabled[j]) + v->DSCDelay[k] = v->DSCDelay[j]; + + // Prefetch + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + unsigned int PDEAndMetaPTEBytesFrameY; + unsigned int PixelPTEBytesPerRowY; + unsigned int MetaRowByteY; + unsigned int MetaRowByteC; + unsigned int PDEAndMetaPTEBytesFrameC; + unsigned int PixelPTEBytesPerRowC; + bool PTEBufferSizeNotExceededY; + bool PTEBufferSizeNotExceededC; + + if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12 + || v->SourcePixelFormat[k] == dm_rgbe_alpha) { + if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) { + v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2; + v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma; + } else { + v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma; + v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma; + } + + PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes( + mode_lib, + v->DCCEnable[k], + v->BlockHeight256BytesC[k], + v->BlockWidth256BytesC[k], + v->SourcePixelFormat[k], + v->SurfaceTiling[k], + v->BytePerPixelC[k], + v->SourceScan[k], + v->SwathWidthC[k], + v->ViewportHeightChroma[k], + v->GPUVMEnable, + v->HostVMEnable, + v->HostVMMaxNonCachedPageTableLevels, + v->GPUVMMinPageSize, + v->HostVMMinPageSize, + v->PTEBufferSizeInRequestsForChroma, + v->PitchC[k], + v->DCCMetaPitchC[k], + &v->MacroTileWidthC[k], + &MetaRowByteC, + &PixelPTEBytesPerRowC, + &PTEBufferSizeNotExceededC, + &v->dpte_row_width_chroma_ub[k], + &v->dpte_row_height_chroma[k], + &v->meta_req_width_chroma[k], + &v->meta_req_height_chroma[k], + &v->meta_row_width_chroma[k], + &v->meta_row_height_chroma[k], + &v->dummyinteger1, + &v->dummyinteger2, + &v->PixelPTEReqWidthC[k], + &v->PixelPTEReqHeightC[k], + &v->PTERequestSizeC[k], + &v->dpde0_bytes_per_frame_ub_c[k], + &v->meta_pte_bytes_per_frame_ub_c[k]); + + v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines( + mode_lib, + v->VRatioChroma[k], + v->VTAPsChroma[k], + v->Interlace[k], + v->ProgressiveToInterlaceUnitInOPP, + v->SwathHeightC[k], + v->ViewportYStartC[k], + &v->VInitPreFillC[k], + &v->MaxNumSwathC[k]); + } else { + v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma; + v->PTEBufferSizeInRequestsForChroma = 0; + PixelPTEBytesPerRowC = 0; + PDEAndMetaPTEBytesFrameC = 0; + MetaRowByteC = 0; + v->MaxNumSwathC[k] = 0; + v->PrefetchSourceLinesC[k] = 0; + } + + PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes( + mode_lib, + v->DCCEnable[k], + v->BlockHeight256BytesY[k], + v->BlockWidth256BytesY[k], + v->SourcePixelFormat[k], + v->SurfaceTiling[k], + v->BytePerPixelY[k], + v->SourceScan[k], + v->SwathWidthY[k], + v->ViewportHeight[k], + v->GPUVMEnable, + v->HostVMEnable, + v->HostVMMaxNonCachedPageTableLevels, + v->GPUVMMinPageSize, + v->HostVMMinPageSize, + v->PTEBufferSizeInRequestsForLuma, + v->PitchY[k], + v->DCCMetaPitchY[k], + &v->MacroTileWidthY[k], + &MetaRowByteY, + &PixelPTEBytesPerRowY, + &PTEBufferSizeNotExceededY, + &v->dpte_row_width_luma_ub[k], + &v->dpte_row_height[k], + &v->meta_req_width[k], + &v->meta_req_height[k], + &v->meta_row_width[k], + &v->meta_row_height[k], + &v->vm_group_bytes[k], + &v->dpte_group_bytes[k], + &v->PixelPTEReqWidthY[k], + &v->PixelPTEReqHeightY[k], + &v->PTERequestSizeY[k], + &v->dpde0_bytes_per_frame_ub_l[k], + &v->meta_pte_bytes_per_frame_ub_l[k]); + + v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines( + mode_lib, + v->VRatio[k], + v->vtaps[k], + v->Interlace[k], + v->ProgressiveToInterlaceUnitInOPP, + v->SwathHeightY[k], + v->ViewportYStartY[k], + &v->VInitPreFillY[k], + &v->MaxNumSwathY[k]); + v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC; + v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC; + v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC; + + CalculateRowBandwidth( + v->GPUVMEnable, + v->SourcePixelFormat[k], + v->VRatio[k], + v->VRatioChroma[k], + v->DCCEnable[k], + v->HTotal[k] / v->PixelClock[k], + MetaRowByteY, + MetaRowByteC, + v->meta_row_height[k], + v->meta_row_height_chroma[k], + PixelPTEBytesPerRowY, + PixelPTEBytesPerRowC, + v->dpte_row_height[k], + v->dpte_row_height_chroma[k], + &v->meta_row_bw[k], + &v->dpte_row_bw[k]); + } + + v->TotalDCCActiveDPP = 0; + v->TotalActiveDPP = 0; + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + v->TotalActiveDPP = v->TotalActiveDPP + v->DPPPerPlane[k]; + if (v->DCCEnable[k]) + v->TotalDCCActiveDPP = v->TotalDCCActiveDPP + v->DPPPerPlane[k]; + if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12 + || v->SourcePixelFormat[k] == dm_rgbe_alpha) + NoChromaPlanes = false; + } + + ReorderBytes = v->NumberOfChannels + * dml_max3( + v->UrgentOutOfOrderReturnPerChannelPixelDataOnly, + v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData, + v->UrgentOutOfOrderReturnPerChannelVMDataOnly); + + VMDataOnlyReturnBW = dml_min( + dml_min(v->ReturnBusWidth * v->DCFCLK, v->FabricClock * v->FabricDatapathToDCNDataReturn) + * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, + v->DRAMSpeed * v->NumberOfChannels * v->DRAMChannelWidth + * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: v->ReturnBusWidth = %f\n", __func__, v->ReturnBusWidth); + dml_print("DML::%s: v->DCFCLK = %f\n", __func__, v->DCFCLK); + dml_print("DML::%s: v->FabricClock = %f\n", __func__, v->FabricClock); + dml_print("DML::%s: v->FabricDatapathToDCNDataReturn = %f\n", __func__, v->FabricDatapathToDCNDataReturn); + dml_print("DML::%s: v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency = %f\n", __func__, v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency); + dml_print("DML::%s: v->DRAMSpeed = %f\n", __func__, v->DRAMSpeed); + dml_print("DML::%s: v->NumberOfChannels = %f\n", __func__, v->NumberOfChannels); + dml_print("DML::%s: v->DRAMChannelWidth = %f\n", __func__, v->DRAMChannelWidth); + dml_print("DML::%s: v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly = %f\n", __func__, v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly); + dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW); + dml_print("DML::%s: ReturnBW = %f\n", __func__, v->ReturnBW); +#endif + + if (v->GPUVMEnable && v->HostVMEnable) + HostVMInefficiencyFactor = v->ReturnBW / VMDataOnlyReturnBW; + + v->UrgentExtraLatency = CalculateExtraLatency( + v->RoundTripPingLatencyCycles, + ReorderBytes, + v->DCFCLK, + v->TotalActiveDPP, + v->PixelChunkSizeInKByte, + v->TotalDCCActiveDPP, + v->MetaChunkSize, + v->ReturnBW, + v->GPUVMEnable, + v->HostVMEnable, + v->NumberOfActivePlanes, + v->DPPPerPlane, + v->dpte_group_bytes, + HostVMInefficiencyFactor, + v->HostVMMinPageSize, + v->HostVMMaxNonCachedPageTableLevels); + + v->TCalc = 24.0 / v->DCFCLKDeepSleep; + + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + if (v->BlendingAndTiming[k] == k) { + if (v->WritebackEnable[k] == true) { + v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency + + CalculateWriteBackDelay( + v->WritebackPixelFormat[k], + v->WritebackHRatio[k], + v->WritebackVRatio[k], + v->WritebackVTaps[k], + v->WritebackDestinationWidth[k], + v->WritebackDestinationHeight[k], + v->WritebackSourceHeight[k], + v->HTotal[k]) / v->DISPCLK; + } else + v->WritebackDelay[v->VoltageLevel][k] = 0; + for (j = 0; j < v->NumberOfActivePlanes; ++j) { + if (v->BlendingAndTiming[j] == k && v->WritebackEnable[j] == true) { + v->WritebackDelay[v->VoltageLevel][k] = dml_max( + v->WritebackDelay[v->VoltageLevel][k], + v->WritebackLatency + + CalculateWriteBackDelay( + v->WritebackPixelFormat[j], + v->WritebackHRatio[j], + v->WritebackVRatio[j], + v->WritebackVTaps[j], + v->WritebackDestinationWidth[j], + v->WritebackDestinationHeight[j], + v->WritebackSourceHeight[j], + v->HTotal[k]) / v->DISPCLK); + } + } + } + } + + for (k = 0; k < v->NumberOfActivePlanes; ++k) + for (j = 0; j < v->NumberOfActivePlanes; ++j) + if (v->BlendingAndTiming[k] == j) + v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j]; + + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + v->MaxVStartupLines[k] = + (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ? + dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) : + v->VTotal[k] - v->VActive[k] + - dml_max( + 1.0, + dml_ceil( + (double) v->WritebackDelay[v->VoltageLevel][k] + / (v->HTotal[k] / v->PixelClock[k]), + 1)); + if (v->MaxVStartupLines[k] > 1023) + v->MaxVStartupLines[k] = 1023; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%d MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]); + dml_print("DML::%s: k=%d VoltageLevel = %d\n", __func__, k, v->VoltageLevel); + dml_print("DML::%s: k=%d WritebackDelay = %f\n", __func__, k, v->WritebackDelay[v->VoltageLevel][k]); +#endif + } + + v->MaximumMaxVStartupLines = 0; + for (k = 0; k < v->NumberOfActivePlanes; ++k) + v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]); + + // VBA_DELTA + // We don't really care to iterate between the various prefetch modes + //v->PrefetchERROR = CalculateMinAndMaxPrefetchMode(v->AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &v->MinPrefetchMode, &v->MaxPrefetchMode); + + v->UrgentLatency = CalculateUrgentLatency( + v->UrgentLatencyPixelDataOnly, + v->UrgentLatencyPixelMixedWithVMData, + v->UrgentLatencyVMDataOnly, + v->DoUrgentLatencyAdjustment, + v->UrgentLatencyAdjustmentFabricClockComponent, + v->UrgentLatencyAdjustmentFabricClockReference, + v->FabricClock); + + v->FractionOfUrgentBandwidth = 0.0; + v->FractionOfUrgentBandwidthImmediateFlip = 0.0; + + v->VStartupLines = __DML_VBA_MIN_VSTARTUP__; + + do { + double MaxTotalRDBandwidthNoUrgentBurst = 0.0; + bool DestinationLineTimesForPrefetchLessThan2 = false; + bool VRatioPrefetchMoreThan4 = false; + double TWait = CalculateTWait(PrefetchMode, v->DRAMClockChangeLatency, v->UrgentLatency, v->SREnterPlusExitTime); + MaxTotalRDBandwidth = 0; + + dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, v->VStartupLines); + + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + Pipe myPipe; + + myPipe.DPPCLK = v->DPPCLK[k]; + myPipe.DISPCLK = v->DISPCLK; + myPipe.PixelClock = v->PixelClock[k]; + myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep; + myPipe.DPPPerPlane = v->DPPPerPlane[k]; + myPipe.ScalerEnabled = v->ScalerEnabled[k]; + myPipe.VRatio = v->VRatio[k]; + myPipe.VRatioChroma = v->VRatioChroma[k]; + myPipe.SourceScan = v->SourceScan[k]; + myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k]; + myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k]; + myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k]; + myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k]; + myPipe.InterlaceEnable = v->Interlace[k]; + myPipe.NumberOfCursors = v->NumberOfCursors[k]; + myPipe.VBlank = v->VTotal[k] - v->VActive[k]; + myPipe.HTotal = v->HTotal[k]; + myPipe.DCCEnable = v->DCCEnable[k]; + myPipe.ODMCombineIsEnabled = v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1 + || v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1; + myPipe.SourcePixelFormat = v->SourcePixelFormat[k]; + myPipe.BytePerPixelY = v->BytePerPixelY[k]; + myPipe.BytePerPixelC = v->BytePerPixelC[k]; + myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP; + v->ErrorResult[k] = CalculatePrefetchSchedule( + mode_lib, + HostVMInefficiencyFactor, + &myPipe, + v->DSCDelay[k], + v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater, + v->DPPCLKDelaySCL, + v->DPPCLKDelaySCLLBOnly, + v->DPPCLKDelayCNVCCursor, + v->DISPCLKDelaySubtotal, + (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]), + v->OutputFormat[k], + v->MaxInterDCNTileRepeaters, + dml_min(v->VStartupLines, v->MaxVStartupLines[k]), + v->MaxVStartupLines[k], + v->GPUVMMaxPageTableLevels, + v->GPUVMEnable, + v->HostVMEnable, + v->HostVMMaxNonCachedPageTableLevels, + v->HostVMMinPageSize, + v->DynamicMetadataEnable[k], + v->DynamicMetadataVMEnabled, + v->DynamicMetadataLinesBeforeActiveRequired[k], + v->DynamicMetadataTransmittedBytes[k], + v->UrgentLatency, + v->UrgentExtraLatency, + v->TCalc, + v->PDEAndMetaPTEBytesFrame[k], + v->MetaRowByte[k], + v->PixelPTEBytesPerRow[k], + v->PrefetchSourceLinesY[k], + v->SwathWidthY[k], + v->VInitPreFillY[k], + v->MaxNumSwathY[k], + v->PrefetchSourceLinesC[k], + v->SwathWidthC[k], + v->VInitPreFillC[k], + v->MaxNumSwathC[k], + v->swath_width_luma_ub[k], + v->swath_width_chroma_ub[k], + v->SwathHeightY[k], + v->SwathHeightC[k], + TWait, + &v->DSTXAfterScaler[k], + &v->DSTYAfterScaler[k], + &v->DestinationLinesForPrefetch[k], + &v->PrefetchBandwidth[k], + &v->DestinationLinesToRequestVMInVBlank[k], + &v->DestinationLinesToRequestRowInVBlank[k], + &v->VRatioPrefetchY[k], + &v->VRatioPrefetchC[k], + &v->RequiredPrefetchPixDataBWLuma[k], + &v->RequiredPrefetchPixDataBWChroma[k], + &v->NotEnoughTimeForDynamicMetadata[k], + &v->Tno_bw[k], + &v->prefetch_vmrow_bw[k], + &v->Tdmdl_vm[k], + &v->Tdmdl[k], + &v->TSetup[k], + &v->VUpdateOffsetPix[k], + &v->VUpdateWidthPix[k], + &v->VReadyOffsetPix[k]); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%0d Prefetch cal result=%0d\n", __func__, k, v->ErrorResult[k]); +#endif + v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]); + } + + v->NoEnoughUrgentLatencyHiding = false; + v->NoEnoughUrgentLatencyHidingPre = false; + + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 + / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k]; + v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 + / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPrefetchY[k]; + + CalculateUrgentBurstFactor( + v->swath_width_luma_ub[k], + v->swath_width_chroma_ub[k], + v->SwathHeightY[k], + v->SwathHeightC[k], + v->HTotal[k] / v->PixelClock[k], + v->UrgentLatency, + v->CursorBufferSize, + v->CursorWidth[k][0], + v->CursorBPP[k][0], + v->VRatio[k], + v->VRatioChroma[k], + v->BytePerPixelDETY[k], + v->BytePerPixelDETC[k], + v->DETBufferSizeY[k], + v->DETBufferSizeC[k], + &v->UrgBurstFactorCursor[k], + &v->UrgBurstFactorLuma[k], + &v->UrgBurstFactorChroma[k], + &v->NoUrgentLatencyHiding[k]); + + CalculateUrgentBurstFactor( + v->swath_width_luma_ub[k], + v->swath_width_chroma_ub[k], + v->SwathHeightY[k], + v->SwathHeightC[k], + v->HTotal[k] / v->PixelClock[k], + v->UrgentLatency, + v->CursorBufferSize, + v->CursorWidth[k][0], + v->CursorBPP[k][0], + v->VRatioPrefetchY[k], + v->VRatioPrefetchC[k], + v->BytePerPixelDETY[k], + v->BytePerPixelDETC[k], + v->DETBufferSizeY[k], + v->DETBufferSizeC[k], + &v->UrgBurstFactorCursorPre[k], + &v->UrgBurstFactorLumaPre[k], + &v->UrgBurstFactorChromaPre[k], + &v->NoUrgentLatencyHidingPre[k]); + + MaxTotalRDBandwidth = MaxTotalRDBandwidth + + dml_max3( + v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], + v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k] + + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k] + + v->cursor_bw[k] * v->UrgBurstFactorCursor[k] + + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]), + v->DPPPerPlane[k] + * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k] + + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k]) + + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]); + + MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst + + dml_max3( + v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k] + + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]), + v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k]) + + v->cursor_bw_pre[k]); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%0d DPPPerPlane=%d\n", __func__, k, v->DPPPerPlane[k]); + dml_print("DML::%s: k=%0d UrgBurstFactorLuma=%f\n", __func__, k, v->UrgBurstFactorLuma[k]); + dml_print("DML::%s: k=%0d UrgBurstFactorChroma=%f\n", __func__, k, v->UrgBurstFactorChroma[k]); + dml_print("DML::%s: k=%0d UrgBurstFactorLumaPre=%f\n", __func__, k, v->UrgBurstFactorLumaPre[k]); + dml_print("DML::%s: k=%0d UrgBurstFactorChromaPre=%f\n", __func__, k, v->UrgBurstFactorChromaPre[k]); + + dml_print("DML::%s: k=%0d VRatioPrefetchY=%f\n", __func__, k, v->VRatioPrefetchY[k]); + dml_print("DML::%s: k=%0d VRatioY=%f\n", __func__, k, v->VRatio[k]); + + dml_print("DML::%s: k=%0d prefetch_vmrow_bw=%f\n", __func__, k, v->prefetch_vmrow_bw[k]); + dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma=%f\n", __func__, k, v->ReadBandwidthPlaneLuma[k]); + dml_print("DML::%s: k=%0d ReadBandwidthPlaneChroma=%f\n", __func__, k, v->ReadBandwidthPlaneChroma[k]); + dml_print("DML::%s: k=%0d cursor_bw=%f\n", __func__, k, v->cursor_bw[k]); + dml_print("DML::%s: k=%0d meta_row_bw=%f\n", __func__, k, v->meta_row_bw[k]); + dml_print("DML::%s: k=%0d dpte_row_bw=%f\n", __func__, k, v->dpte_row_bw[k]); + dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWLuma[k]); + dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWChroma[k]); + dml_print("DML::%s: k=%0d cursor_bw_pre=%f\n", __func__, k, v->cursor_bw_pre[k]); + dml_print("DML::%s: k=%0d MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, MaxTotalRDBandwidthNoUrgentBurst); +#endif + + if (v->DestinationLinesForPrefetch[k] < 2) + DestinationLineTimesForPrefetchLessThan2 = true; + + if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4) + VRatioPrefetchMoreThan4 = true; + + if (v->NoUrgentLatencyHiding[k] == true) + v->NoEnoughUrgentLatencyHiding = true; + + if (v->NoUrgentLatencyHidingPre[k] == true) + v->NoEnoughUrgentLatencyHidingPre = true; + } + + v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f \n", __func__, MaxTotalRDBandwidthNoUrgentBurst); + dml_print("DML::%s: ReturnBW=%f \n", __func__, v->ReturnBW); + dml_print("DML::%s: FractionOfUrgentBandwidth=%f \n", __func__, v->FractionOfUrgentBandwidth); +#endif + + if (MaxTotalRDBandwidth <= v->ReturnBW && v->NoEnoughUrgentLatencyHiding == 0 && v->NoEnoughUrgentLatencyHidingPre == 0 + && !VRatioPrefetchMoreThan4 && !DestinationLineTimesForPrefetchLessThan2) + v->PrefetchModeSupported = true; + else { + v->PrefetchModeSupported = false; + dml_print("DML::%s: ***failed***. Bandwidth violation. Results are NOT valid\n", __func__); + dml_print("DML::%s: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", __func__, MaxTotalRDBandwidth, v->ReturnBW); + dml_print("DML::%s: VRatioPrefetch %s more than 4\n", __func__, (VRatioPrefetchMoreThan4) ? "is" : "is not"); + dml_print("DML::%s: DestinationLines for Prefetch %s less than 2\n", __func__, (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not"); + } + + // PREVIOUS_ERROR + // This error result check was done after the PrefetchModeSupported. So we will + // still try to calculate flip schedule even prefetch mode not supported + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + if (v->ErrorResult[k] == true || v->NotEnoughTimeForDynamicMetadata[k] == true) { + v->PrefetchModeSupported = false; + dml_print("DML::%s: ***failed***. Prefetch schedule violation. Results are NOT valid\n", __func__); + } + } + + if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) { + v->BandwidthAvailableForImmediateFlip = v->ReturnBW; + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip + - dml_max( + v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k] + + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k] + + v->cursor_bw[k] * v->UrgBurstFactorCursor[k], + v->DPPPerPlane[k] + * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k] + + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k]) + + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]); + } + + v->TotImmediateFlipBytes = 0; + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + v->TotImmediateFlipBytes = v->TotImmediateFlipBytes + + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]); + } + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + CalculateFlipSchedule( + mode_lib, + k, + HostVMInefficiencyFactor, + v->UrgentExtraLatency, + v->UrgentLatency, + v->PDEAndMetaPTEBytesFrame[k], + v->MetaRowByte[k], + v->PixelPTEBytesPerRow[k]); + } + + v->total_dcn_read_bw_with_flip = 0.0; + v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0; + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip + + dml_max3( + v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], + v->DPPPerPlane[k] * v->final_flip_bw[k] + + v->ReadBandwidthLuma[k] * v->UrgBurstFactorLuma[k] + + v->ReadBandwidthChroma[k] * v->UrgBurstFactorChroma[k] + + v->cursor_bw[k] * v->UrgBurstFactorCursor[k], + v->DPPPerPlane[k] + * (v->final_flip_bw[k] + + v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k] + + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k]) + + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]); + v->total_dcn_read_bw_with_flip_no_urgent_burst = v->total_dcn_read_bw_with_flip_no_urgent_burst + + dml_max3( + v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], + v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k] + + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k], + v->DPPPerPlane[k] + * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k] + + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]); + } + v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW; + + v->ImmediateFlipSupported = true; + if (v->total_dcn_read_bw_with_flip > v->ReturnBW) { +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: total_dcn_read_bw_with_flip %f (bw w/ flip too high!)\n", __func__, v->total_dcn_read_bw_with_flip); +#endif + v->ImmediateFlipSupported = false; + v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth; + } + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + if (v->ImmediateFlipSupportedForPipe[k] == false) { +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: Pipe %0d not supporting iflip\n", + __func__, k); +#endif + v->ImmediateFlipSupported = false; + } + } + } else { + v->ImmediateFlipSupported = false; + } + + v->PrefetchAndImmediateFlipSupported = + (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport && !v->HostVMEnable + && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) || + v->ImmediateFlipSupported)) ? true : false; +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: PrefetchModeSupported %d\n", __func__, v->PrefetchModeSupported); + dml_print("DML::%s: ImmediateFlipRequirement[0] %d\n", __func__, v->ImmediateFlipRequirement[0] == dm_immediate_flip_required); + dml_print("DML::%s: ImmediateFlipSupported %d\n", __func__, v->ImmediateFlipSupported); + dml_print("DML::%s: ImmediateFlipSupport %d\n", __func__, v->ImmediateFlipSupport); + dml_print("DML::%s: HostVMEnable %d\n", __func__, v->HostVMEnable); + dml_print("DML::%s: PrefetchAndImmediateFlipSupported %d\n", __func__, v->PrefetchAndImmediateFlipSupported); +#endif + dml_print("DML::%s: Done loop: Vstartup=%d, Max Vstartup is %d\n", __func__, v->VStartupLines, v->MaximumMaxVStartupLines); + + v->VStartupLines = v->VStartupLines + 1; + } while (!v->PrefetchAndImmediateFlipSupported && v->VStartupLines <= v->MaximumMaxVStartupLines); + ASSERT(v->PrefetchAndImmediateFlipSupported); + + // Unbounded Request Enabled + CalculateUnboundedRequestAndCompressedBufferSize( + v->DETBufferSizeInKByte[0], + v->ConfigReturnBufferSizeInKByte, + v->UseUnboundedRequesting, + v->TotalActiveDPP, + NoChromaPlanes, + v->MaxNumDPP, + v->CompressedBufferSegmentSizeInkByte, + v->Output, + &v->UnboundedRequestEnabled, + &v->CompressedBufferSizeInkByte); + + //Watermarks and NB P-State/DRAM Clock Change Support + { + enum clock_change_support DRAMClockChangeSupport; // dummy + CalculateWatermarksAndDRAMSpeedChangeSupport( + mode_lib, + PrefetchMode, + v->DCFCLK, + v->ReturnBW, + v->UrgentLatency, + v->UrgentExtraLatency, + v->SOCCLK, + v->DCFCLKDeepSleep, + v->DETBufferSizeY, + v->DETBufferSizeC, + v->SwathHeightY, + v->SwathHeightC, + v->SwathWidthY, + v->SwathWidthC, + v->DPPPerPlane, + v->BytePerPixelDETY, + v->BytePerPixelDETC, + v->UnboundedRequestEnabled, + v->CompressedBufferSizeInkByte, + &DRAMClockChangeSupport, + &v->StutterExitWatermark, + &v->StutterEnterPlusExitWatermark, + &v->Z8StutterExitWatermark, + &v->Z8StutterEnterPlusExitWatermark); + + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + if (v->WritebackEnable[k] == true) { + v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max( + 0, + v->VStartup[k] * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark); + } else { + v->WritebackAllowDRAMClockChangeEndPosition[k] = 0; + } + } + } + + //Display Pipeline Delivery Time in Prefetch, Groups + CalculatePixelDeliveryTimes( + v->NumberOfActivePlanes, + v->VRatio, + v->VRatioChroma, + v->VRatioPrefetchY, + v->VRatioPrefetchC, + v->swath_width_luma_ub, + v->swath_width_chroma_ub, + v->DPPPerPlane, + v->HRatio, + v->HRatioChroma, + v->PixelClock, + v->PSCL_THROUGHPUT_LUMA, + v->PSCL_THROUGHPUT_CHROMA, + v->DPPCLK, + v->BytePerPixelC, + v->SourceScan, + v->NumberOfCursors, + v->CursorWidth, + v->CursorBPP, + v->BlockWidth256BytesY, + v->BlockHeight256BytesY, + v->BlockWidth256BytesC, + v->BlockHeight256BytesC, + v->DisplayPipeLineDeliveryTimeLuma, + v->DisplayPipeLineDeliveryTimeChroma, + v->DisplayPipeLineDeliveryTimeLumaPrefetch, + v->DisplayPipeLineDeliveryTimeChromaPrefetch, + v->DisplayPipeRequestDeliveryTimeLuma, + v->DisplayPipeRequestDeliveryTimeChroma, + v->DisplayPipeRequestDeliveryTimeLumaPrefetch, + v->DisplayPipeRequestDeliveryTimeChromaPrefetch, + v->CursorRequestDeliveryTime, + v->CursorRequestDeliveryTimePrefetch); + + CalculateMetaAndPTETimes( + v->NumberOfActivePlanes, + v->GPUVMEnable, + v->MetaChunkSize, + v->MinMetaChunkSizeBytes, + v->HTotal, + v->VRatio, + v->VRatioChroma, + v->DestinationLinesToRequestRowInVBlank, + v->DestinationLinesToRequestRowInImmediateFlip, + v->DCCEnable, + v->PixelClock, + v->BytePerPixelY, + v->BytePerPixelC, + v->SourceScan, + v->dpte_row_height, + v->dpte_row_height_chroma, + v->meta_row_width, + v->meta_row_width_chroma, + v->meta_row_height, + v->meta_row_height_chroma, + v->meta_req_width, + v->meta_req_width_chroma, + v->meta_req_height, + v->meta_req_height_chroma, + v->dpte_group_bytes, + v->PTERequestSizeY, + v->PTERequestSizeC, + v->PixelPTEReqWidthY, + v->PixelPTEReqHeightY, + v->PixelPTEReqWidthC, + v->PixelPTEReqHeightC, + v->dpte_row_width_luma_ub, + v->dpte_row_width_chroma_ub, + v->DST_Y_PER_PTE_ROW_NOM_L, + v->DST_Y_PER_PTE_ROW_NOM_C, + v->DST_Y_PER_META_ROW_NOM_L, + v->DST_Y_PER_META_ROW_NOM_C, + v->TimePerMetaChunkNominal, + v->TimePerChromaMetaChunkNominal, + v->TimePerMetaChunkVBlank, + v->TimePerChromaMetaChunkVBlank, + v->TimePerMetaChunkFlip, + v->TimePerChromaMetaChunkFlip, + v->time_per_pte_group_nom_luma, + v->time_per_pte_group_vblank_luma, + v->time_per_pte_group_flip_luma, + v->time_per_pte_group_nom_chroma, + v->time_per_pte_group_vblank_chroma, + v->time_per_pte_group_flip_chroma); + + CalculateVMGroupAndRequestTimes( + v->NumberOfActivePlanes, + v->GPUVMEnable, + v->GPUVMMaxPageTableLevels, + v->HTotal, + v->BytePerPixelC, + v->DestinationLinesToRequestVMInVBlank, + v->DestinationLinesToRequestVMInImmediateFlip, + v->DCCEnable, + v->PixelClock, + v->dpte_row_width_luma_ub, + v->dpte_row_width_chroma_ub, + v->vm_group_bytes, + v->dpde0_bytes_per_frame_ub_l, + v->dpde0_bytes_per_frame_ub_c, + v->meta_pte_bytes_per_frame_ub_l, + v->meta_pte_bytes_per_frame_ub_c, + v->TimePerVMGroupVBlank, + v->TimePerVMGroupFlip, + v->TimePerVMRequestVBlank, + v->TimePerVMRequestFlip); + + // Min TTUVBlank + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + if (PrefetchMode == 0) { + v->AllowDRAMClockChangeDuringVBlank[k] = true; + v->AllowDRAMSelfRefreshDuringVBlank[k] = true; + v->MinTTUVBlank[k] = dml_max( + v->DRAMClockChangeWatermark, + dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark)); + } else if (PrefetchMode == 1) { + v->AllowDRAMClockChangeDuringVBlank[k] = false; + v->AllowDRAMSelfRefreshDuringVBlank[k] = true; + v->MinTTUVBlank[k] = dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark); + } else { + v->AllowDRAMClockChangeDuringVBlank[k] = false; + v->AllowDRAMSelfRefreshDuringVBlank[k] = false; + v->MinTTUVBlank[k] = v->UrgentWatermark; + } + if (!v->DynamicMetadataEnable[k]) + v->MinTTUVBlank[k] = v->TCalc + v->MinTTUVBlank[k]; + } + + // DCC Configuration + v->ActiveDPPs = 0; + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + CalculateDCCConfiguration(v->DCCEnable[k], false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown, + v->SourcePixelFormat[k], + v->SurfaceWidthY[k], + v->SurfaceWidthC[k], + v->SurfaceHeightY[k], + v->SurfaceHeightC[k], + v->DETBufferSizeInKByte[k] * 1024, + v->BlockHeight256BytesY[k], + v->BlockHeight256BytesC[k], + v->SurfaceTiling[k], + v->BytePerPixelY[k], + v->BytePerPixelC[k], + v->BytePerPixelDETY[k], + v->BytePerPixelDETC[k], + v->SourceScan[k], + &v->DCCYMaxUncompressedBlock[k], + &v->DCCCMaxUncompressedBlock[k], + &v->DCCYMaxCompressedBlock[k], + &v->DCCCMaxCompressedBlock[k], + &v->DCCYIndependentBlock[k], + &v->DCCCIndependentBlock[k]); + } + + // VStartup Adjustment + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + bool isInterlaceTiming; + double Tvstartup_margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k] / v->PixelClock[k]; +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%d, MinTTUVBlank = %f (before margin)\n", __func__, k, v->MinTTUVBlank[k]); +#endif + + v->MinTTUVBlank[k] = v->MinTTUVBlank[k] + Tvstartup_margin; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%d, Tvstartup_margin = %f\n", __func__, k, Tvstartup_margin); + dml_print("DML::%s: k=%d, MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]); + dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]); + dml_print("DML::%s: k=%d, MinTTUVBlank = %f\n", __func__, k, v->MinTTUVBlank[k]); +#endif + + v->Tdmdl[k] = v->Tdmdl[k] + Tvstartup_margin; + if (v->DynamicMetadataEnable[k] && v->DynamicMetadataVMEnabled) { + v->Tdmdl_vm[k] = v->Tdmdl_vm[k] + Tvstartup_margin; + } + + isInterlaceTiming = (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP); + + v->MIN_DST_Y_NEXT_START[k] = ((isInterlaceTiming ? dml_floor((v->VTotal[k] - v->VFrontPorch[k]) / 2.0, 1.0) : v->VTotal[k]) + - v->VFrontPorch[k]) + + dml_max(1.0, dml_ceil(v->WritebackDelay[v->VoltageLevel][k] / (v->HTotal[k] / v->PixelClock[k]), 1.0)) + + dml_floor(4.0 * v->TSetup[k] / (v->HTotal[k] / v->PixelClock[k]), 1.0) / 4.0; + + v->VStartup[k] = (isInterlaceTiming ? (2 * v->MaxVStartupLines[k]) : v->MaxVStartupLines[k]); + + if (((v->VUpdateOffsetPix[k] + v->VUpdateWidthPix[k] + v->VReadyOffsetPix[k]) / v->HTotal[k]) + <= (isInterlaceTiming ? + dml_floor((v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]) / 2.0, 1.0) : + (int) (v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]))) { + v->VREADY_AT_OR_AFTER_VSYNC[k] = true; + } else { + v->VREADY_AT_OR_AFTER_VSYNC[k] = false; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%d, VStartup = %d (max)\n", __func__, k, v->VStartup[k]); + dml_print("DML::%s: k=%d, VUpdateOffsetPix = %d\n", __func__, k, v->VUpdateOffsetPix[k]); + dml_print("DML::%s: k=%d, VUpdateWidthPix = %d\n", __func__, k, v->VUpdateWidthPix[k]); + dml_print("DML::%s: k=%d, VReadyOffsetPix = %d\n", __func__, k, v->VReadyOffsetPix[k]); + dml_print("DML::%s: k=%d, HTotal = %d\n", __func__, k, v->HTotal[k]); + dml_print("DML::%s: k=%d, VTotal = %d\n", __func__, k, v->VTotal[k]); + dml_print("DML::%s: k=%d, VActive = %d\n", __func__, k, v->VActive[k]); + dml_print("DML::%s: k=%d, VFrontPorch = %d\n", __func__, k, v->VFrontPorch[k]); + dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]); + dml_print("DML::%s: k=%d, MIN_DST_Y_NEXT_START = %f\n", __func__, k, v->MIN_DST_Y_NEXT_START[k]); + dml_print("DML::%s: k=%d, VREADY_AT_OR_AFTER_VSYNC = %d\n", __func__, k, v->VREADY_AT_OR_AFTER_VSYNC[k]); +#endif + } + + { + //Maximum Bandwidth Used + double TotalWRBandwidth = 0; + double MaxPerPlaneVActiveWRBandwidth = 0; + double WRBandwidth = 0; + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_32) { + WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] + / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4; + } else if (v->WritebackEnable[k] == true) { + WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] + / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8; + } + TotalWRBandwidth = TotalWRBandwidth + WRBandwidth; + MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth); + } + + v->TotalDataReadBandwidth = 0; + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + v->TotalDataReadBandwidth = v->TotalDataReadBandwidth + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]; + } + } + // Stutter Efficiency + CalculateStutterEfficiency( + mode_lib, + v->CompressedBufferSizeInkByte, + v->UnboundedRequestEnabled, + v->ConfigReturnBufferSizeInKByte, + v->MetaFIFOSizeInKEntries, + v->ZeroSizeBufferEntries, + v->NumberOfActivePlanes, + v->ROBBufferSizeInKByte, + v->TotalDataReadBandwidth, + v->DCFCLK, + v->ReturnBW, + v->COMPBUF_RESERVED_SPACE_64B, + v->COMPBUF_RESERVED_SPACE_ZS, + v->SRExitTime, + v->SRExitZ8Time, + v->SynchronizedVBlank, + v->StutterEnterPlusExitWatermark, + v->Z8StutterEnterPlusExitWatermark, + v->ProgressiveToInterlaceUnitInOPP, + v->Interlace, + v->MinTTUVBlank, + v->DPPPerPlane, + v->DETBufferSizeY, + v->BytePerPixelY, + v->BytePerPixelDETY, + v->SwathWidthY, + v->SwathHeightY, + v->SwathHeightC, + v->DCCRateLuma, + v->DCCRateChroma, + v->DCCFractionOfZeroSizeRequestsLuma, + v->DCCFractionOfZeroSizeRequestsChroma, + v->HTotal, + v->VTotal, + v->PixelClock, + v->VRatio, + v->SourceScan, + v->BlockHeight256BytesY, + v->BlockWidth256BytesY, + v->BlockHeight256BytesC, + v->BlockWidth256BytesC, + v->DCCYMaxUncompressedBlock, + v->DCCCMaxUncompressedBlock, + v->VActive, + v->DCCEnable, + v->WritebackEnable, + v->ReadBandwidthPlaneLuma, + v->ReadBandwidthPlaneChroma, + v->meta_row_bw, + v->dpte_row_bw, + &v->StutterEfficiencyNotIncludingVBlank, + &v->StutterEfficiency, + &v->NumberOfStutterBurstsPerFrame, + &v->Z8StutterEfficiencyNotIncludingVBlank, + &v->Z8StutterEfficiency, + &v->Z8NumberOfStutterBurstsPerFrame, + &v->StutterPeriod); +} + +static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib) +{ + struct vba_vars_st *v = &mode_lib->vba; + // Display Pipe Configuration + double BytePerPixDETY[DC__NUM_DPP__MAX]; + double BytePerPixDETC[DC__NUM_DPP__MAX]; + int BytePerPixY[DC__NUM_DPP__MAX]; + int BytePerPixC[DC__NUM_DPP__MAX]; + int Read256BytesBlockHeightY[DC__NUM_DPP__MAX]; + int Read256BytesBlockHeightC[DC__NUM_DPP__MAX]; + int Read256BytesBlockWidthY[DC__NUM_DPP__MAX]; + int Read256BytesBlockWidthC[DC__NUM_DPP__MAX]; + double dummy1[DC__NUM_DPP__MAX]; + double dummy2[DC__NUM_DPP__MAX]; + double dummy3[DC__NUM_DPP__MAX]; + double dummy4[DC__NUM_DPP__MAX]; + int dummy5[DC__NUM_DPP__MAX]; + int dummy6[DC__NUM_DPP__MAX]; + bool dummy7[DC__NUM_DPP__MAX]; + bool dummysinglestring; + + unsigned int k; + + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + + dml30_CalculateBytePerPixelAnd256BBlockSizes( + v->SourcePixelFormat[k], + v->SurfaceTiling[k], + &BytePerPixY[k], + &BytePerPixC[k], + &BytePerPixDETY[k], + &BytePerPixDETC[k], + &Read256BytesBlockHeightY[k], + &Read256BytesBlockHeightC[k], + &Read256BytesBlockWidthY[k], + &Read256BytesBlockWidthC[k]); + } + + CalculateSwathAndDETConfiguration( + false, + v->NumberOfActivePlanes, + mode_lib->project == DML_PROJECT_DCN315 && v->DETSizeOverride[0], + v->DETBufferSizeInKByte, + dummy1, + dummy2, + v->SourceScan, + v->SourcePixelFormat, + v->SurfaceTiling, + v->ViewportWidth, + v->ViewportHeight, + v->SurfaceWidthY, + v->SurfaceWidthC, + v->SurfaceHeightY, + v->SurfaceHeightC, + Read256BytesBlockHeightY, + Read256BytesBlockHeightC, + Read256BytesBlockWidthY, + Read256BytesBlockWidthC, + v->ODMCombineEnabled, + v->BlendingAndTiming, + BytePerPixY, + BytePerPixC, + BytePerPixDETY, + BytePerPixDETC, + v->HActive, + v->HRatio, + v->HRatioChroma, + v->DPPPerPlane, + dummy5, + dummy6, + dummy3, + dummy4, + v->SwathHeightY, + v->SwathHeightC, + v->DETBufferSizeY, + v->DETBufferSizeC, + dummy7, + &dummysinglestring); +} + +static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime) +{ + if (PrefetchMode == 0) { + return dml_max(DRAMClockChangeLatency + UrgentLatency, dml_max(SREnterPlusExitTime, UrgentLatency)); + } else if (PrefetchMode == 1) { + return dml_max(SREnterPlusExitTime, UrgentLatency); + } else { + return UrgentLatency; + } +} + +double dml31_CalculateWriteBackDISPCLK( + enum source_format_class WritebackPixelFormat, + double PixelClock, + double WritebackHRatio, + double WritebackVRatio, + unsigned int WritebackHTaps, + unsigned int WritebackVTaps, + long WritebackSourceWidth, + long WritebackDestinationWidth, + unsigned int HTotal, + unsigned int WritebackLineBufferSize) +{ + double DISPCLK_H, DISPCLK_V, DISPCLK_HB; + + DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio; + DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal; + DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth; + return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB); +} + +static double CalculateWriteBackDelay( + enum source_format_class WritebackPixelFormat, + double WritebackHRatio, + double WritebackVRatio, + unsigned int WritebackVTaps, + int WritebackDestinationWidth, + int WritebackDestinationHeight, + int WritebackSourceHeight, + unsigned int HTotal) +{ + double CalculateWriteBackDelay; + double Line_length; + double Output_lines_last_notclamped; + double WritebackVInit; + + WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2; + Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps); + Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1); + if (Output_lines_last_notclamped < 0) { + CalculateWriteBackDelay = 0; + } else { + CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80; + } + return CalculateWriteBackDelay; +} + +static void CalculateVupdateAndDynamicMetadataParameters( + int MaxInterDCNTileRepeaters, + double DPPCLK, + double DISPCLK, + double DCFClkDeepSleep, + double PixelClock, + int HTotal, + int VBlank, + int DynamicMetadataTransmittedBytes, + int DynamicMetadataLinesBeforeActiveRequired, + int InterlaceEnable, + bool ProgressiveToInterlaceUnitInOPP, + double *TSetup, + double *Tdmbf, + double *Tdmec, + double *Tdmsks, + int *VUpdateOffsetPix, + double *VUpdateWidthPix, + double *VReadyOffsetPix) +{ + double TotalRepeaterDelayTime; + + TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK); + *VUpdateWidthPix = dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime) * PixelClock, 1.0); + *VReadyOffsetPix = dml_ceil(dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / DPPCLK) * PixelClock, 1.0); + *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1); + *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock; + *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK; + *Tdmec = HTotal / PixelClock; + if (DynamicMetadataLinesBeforeActiveRequired == 0) { + *Tdmsks = VBlank * HTotal / PixelClock / 2.0; + } else { + *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock; + } + if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) { + *Tdmsks = *Tdmsks / 2; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix); + dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix); + dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix); +#endif +} + +static void CalculateRowBandwidth( + bool GPUVMEnable, + enum source_format_class SourcePixelFormat, + double VRatio, + double VRatioChroma, + bool DCCEnable, + double LineTime, + unsigned int MetaRowByteLuma, + unsigned int MetaRowByteChroma, + unsigned int meta_row_height_luma, + unsigned int meta_row_height_chroma, + unsigned int PixelPTEBytesPerRowLuma, + unsigned int PixelPTEBytesPerRowChroma, + unsigned int dpte_row_height_luma, + unsigned int dpte_row_height_chroma, + double *meta_row_bw, + double *dpte_row_bw) +{ + if (DCCEnable != true) { + *meta_row_bw = 0; + } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) { + *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * MetaRowByteChroma / (meta_row_height_chroma * LineTime); + } else { + *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime); + } + + if (GPUVMEnable != true) { + *dpte_row_bw = 0; + } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) { + *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) + + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime); + } else { + *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime); + } +} + +static void CalculateFlipSchedule( + struct display_mode_lib *mode_lib, + unsigned int k, + double HostVMInefficiencyFactor, + double UrgentExtraLatency, + double UrgentLatency, + double PDEAndMetaPTEBytesPerFrame, + double MetaRowBytes, + double DPTEBytesPerRow) +{ + struct vba_vars_st *v = &mode_lib->vba; + double min_row_time = 0.0; + unsigned int HostVMDynamicLevelsTrips; + double TimeForFetchingMetaPTEImmediateFlip; + double TimeForFetchingRowInVBlankImmediateFlip; + double ImmediateFlipBW; + double LineTime = v->HTotal[k] / v->PixelClock[k]; + + if (v->GPUVMEnable == true && v->HostVMEnable == true) { + HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels; + } else { + HostVMDynamicLevelsTrips = 0; + } + + if (v->GPUVMEnable == true || v->DCCEnable[k] == true) { + ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * v->BandwidthAvailableForImmediateFlip / v->TotImmediateFlipBytes; + } + + if (v->GPUVMEnable == true) { + TimeForFetchingMetaPTEImmediateFlip = dml_max3( + v->Tno_bw[k] + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW, + UrgentExtraLatency + UrgentLatency * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1), + LineTime / 4.0); + } else { + TimeForFetchingMetaPTEImmediateFlip = 0; + } + + v->DestinationLinesToRequestVMInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0; + if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) { + TimeForFetchingRowInVBlankImmediateFlip = dml_max3( + (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW, + UrgentLatency * (HostVMDynamicLevelsTrips + 1), + LineTime / 4); + } else { + TimeForFetchingRowInVBlankImmediateFlip = 0; + } + + v->DestinationLinesToRequestRowInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0; + + if (v->GPUVMEnable == true) { + v->final_flip_bw[k] = dml_max( + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (v->DestinationLinesToRequestVMInImmediateFlip[k] * LineTime), + (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime)); + } else if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) { + v->final_flip_bw[k] = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime); + } else { + v->final_flip_bw[k] = 0; + } + + if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_rgbe_alpha) { + if (v->GPUVMEnable == true && v->DCCEnable[k] != true) { + min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k]); + } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) { + min_row_time = dml_min(v->meta_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]); + } else { + min_row_time = dml_min4( + v->dpte_row_height[k] * LineTime / v->VRatio[k], + v->meta_row_height[k] * LineTime / v->VRatio[k], + v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k], + v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]); + } + } else { + if (v->GPUVMEnable == true && v->DCCEnable[k] != true) { + min_row_time = v->dpte_row_height[k] * LineTime / v->VRatio[k]; + } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) { + min_row_time = v->meta_row_height[k] * LineTime / v->VRatio[k]; + } else { + min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height[k] * LineTime / v->VRatio[k]); + } + } + + if (v->DestinationLinesToRequestVMInImmediateFlip[k] >= 32 || v->DestinationLinesToRequestRowInImmediateFlip[k] >= 16 + || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) { + v->ImmediateFlipSupportedForPipe[k] = false; + } else { + v->ImmediateFlipSupportedForPipe[k] = true; + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestVMInImmediateFlip[k]); + dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestRowInImmediateFlip[k]); + dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip); + dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip); + dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time); + dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, v->ImmediateFlipSupportedForPipe[k]); +#endif + +} + +static double TruncToValidBPP( + double LinkBitRate, + int Lanes, + int HTotal, + int HActive, + double PixelClock, + double DesiredBPP, + bool DSCEnable, + enum output_encoder_class Output, + enum output_format_class Format, + unsigned int DSCInputBitPerComponent, + int DSCSlices, + int AudioRate, + int AudioLayout, + enum odm_combine_mode ODMCombine) +{ + double MaxLinkBPP; + int MinDSCBPP; + double MaxDSCBPP; + int NonDSCBPP0; + int NonDSCBPP1; + int NonDSCBPP2; + + if (Format == dm_420) { + NonDSCBPP0 = 12; + NonDSCBPP1 = 15; + NonDSCBPP2 = 18; + MinDSCBPP = 6; + MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16; + } else if (Format == dm_444) { + NonDSCBPP0 = 24; + NonDSCBPP1 = 30; + NonDSCBPP2 = 36; + MinDSCBPP = 8; + MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16; + } else { + + NonDSCBPP0 = 16; + NonDSCBPP1 = 20; + NonDSCBPP2 = 24; + + if (Format == dm_n422) { + MinDSCBPP = 7; + MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0; + } else { + MinDSCBPP = 8; + MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0; + } + } + + if (DSCEnable && Output == dm_dp) { + MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100); + } else { + MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock; + } + + if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) { + MaxLinkBPP = 16; + } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) { + MaxLinkBPP = 32; + } + + if (DesiredBPP == 0) { + if (DSCEnable) { + if (MaxLinkBPP < MinDSCBPP) { + return BPP_INVALID; + } else if (MaxLinkBPP >= MaxDSCBPP) { + return MaxDSCBPP; + } else { + return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0; + } + } else { + if (MaxLinkBPP >= NonDSCBPP2) { + return NonDSCBPP2; + } else if (MaxLinkBPP >= NonDSCBPP1) { + return NonDSCBPP1; + } else if (MaxLinkBPP >= NonDSCBPP0) { + return 16.0; + } else { + return BPP_INVALID; + } + } + } else { + if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP <= NonDSCBPP0)) + || (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) { + return BPP_INVALID; + } else { + return DesiredBPP; + } + } + return BPP_INVALID; +} + +static noinline void CalculatePrefetchSchedulePerPlane( + struct display_mode_lib *mode_lib, + double HostVMInefficiencyFactor, + int i, + unsigned j, + unsigned k) +{ + struct vba_vars_st *v = &mode_lib->vba; + Pipe myPipe; + + myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k]; + myPipe.DISPCLK = v->RequiredDISPCLK[i][j]; + myPipe.PixelClock = v->PixelClock[k]; + myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j]; + myPipe.DPPPerPlane = v->NoOfDPP[i][j][k]; + myPipe.ScalerEnabled = v->ScalerEnabled[k]; + myPipe.VRatio = mode_lib->vba.VRatio[k]; + myPipe.VRatioChroma = mode_lib->vba.VRatioChroma[k]; + + myPipe.SourceScan = v->SourceScan[k]; + myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k]; + myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k]; + myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k]; + myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k]; + myPipe.InterlaceEnable = v->Interlace[k]; + myPipe.NumberOfCursors = v->NumberOfCursors[k]; + myPipe.VBlank = v->VTotal[k] - v->VActive[k]; + myPipe.HTotal = v->HTotal[k]; + myPipe.DCCEnable = v->DCCEnable[k]; + myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1 + || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1; + myPipe.SourcePixelFormat = v->SourcePixelFormat[k]; + myPipe.BytePerPixelY = v->BytePerPixelY[k]; + myPipe.BytePerPixelC = v->BytePerPixelC[k]; + myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP; + v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule( + mode_lib, + HostVMInefficiencyFactor, + &myPipe, + v->DSCDelayPerState[i][k], + v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater, + v->DPPCLKDelaySCL, + v->DPPCLKDelaySCLLBOnly, + v->DPPCLKDelayCNVCCursor, + v->DISPCLKDelaySubtotal, + v->SwathWidthYThisState[k] / v->HRatio[k], + v->OutputFormat[k], + v->MaxInterDCNTileRepeaters, + dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]), + v->MaximumVStartup[i][j][k], + v->GPUVMMaxPageTableLevels, + v->GPUVMEnable, + v->HostVMEnable, + v->HostVMMaxNonCachedPageTableLevels, + v->HostVMMinPageSize, + v->DynamicMetadataEnable[k], + v->DynamicMetadataVMEnabled, + v->DynamicMetadataLinesBeforeActiveRequired[k], + v->DynamicMetadataTransmittedBytes[k], + v->UrgLatency[i], + v->ExtraLatency, + v->TimeCalc, + v->PDEAndMetaPTEBytesPerFrame[i][j][k], + v->MetaRowBytes[i][j][k], + v->DPTEBytesPerRow[i][j][k], + v->PrefetchLinesY[i][j][k], + v->SwathWidthYThisState[k], + v->PrefillY[k], + v->MaxNumSwY[k], + v->PrefetchLinesC[i][j][k], + v->SwathWidthCThisState[k], + v->PrefillC[k], + v->MaxNumSwC[k], + v->swath_width_luma_ub_this_state[k], + v->swath_width_chroma_ub_this_state[k], + v->SwathHeightYThisState[k], + v->SwathHeightCThisState[k], + v->TWait, + &v->DSTXAfterScaler[k], + &v->DSTYAfterScaler[k], + &v->LineTimesForPrefetch[k], + &v->PrefetchBW[k], + &v->LinesForMetaPTE[k], + &v->LinesForMetaAndDPTERow[k], + &v->VRatioPreY[i][j][k], + &v->VRatioPreC[i][j][k], + &v->RequiredPrefetchPixelDataBWLuma[i][j][k], + &v->RequiredPrefetchPixelDataBWChroma[i][j][k], + &v->NoTimeForDynamicMetadata[i][j][k], + &v->Tno_bw[k], + &v->prefetch_vmrow_bw[k], + &v->dummy7[k], + &v->dummy8[k], + &v->dummy13[k], + &v->VUpdateOffsetPix[k], + &v->VUpdateWidthPix[k], + &v->VReadyOffsetPix[k]); +} + +static void PatchDETBufferSizeInKByte(unsigned int NumberOfActivePlanes, int NoOfDPPThisState[], unsigned int config_return_buffer_size_in_kbytes, unsigned int DETBufferSizeInKByte[]) +{ + int i, total_pipes = 0; + for (i = 0; i < NumberOfActivePlanes; i++) + total_pipes += NoOfDPPThisState[i]; + DETBufferSizeInKByte[0] = ((config_return_buffer_size_in_kbytes - DCN3_15_MIN_COMPBUF_SIZE_KB) / 64 / total_pipes) * 64; + if (DETBufferSizeInKByte[0] > DCN3_15_MAX_DET_SIZE) + DETBufferSizeInKByte[0] = DCN3_15_MAX_DET_SIZE; + for (i = 1; i < NumberOfActivePlanes; i++) + DETBufferSizeInKByte[i] = DETBufferSizeInKByte[0]; +} + + +void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) +{ + struct vba_vars_st *v = &mode_lib->vba; + + int i, j; + unsigned int k, m; + int ReorderingBytes; + int MinPrefetchMode = 0, MaxPrefetchMode = 2; + bool NoChroma = true; + bool EnoughWritebackUnits = true; + bool P2IWith420 = false; + bool DSCOnlyIfNecessaryWithBPP = false; + bool DSC422NativeNotSupported = false; + double MaxTotalVActiveRDBandwidth; + bool ViewportExceedsSurface = false; + bool FMTBufferExceeded = false; + + /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/ + + CalculateMinAndMaxPrefetchMode( + mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, + &MinPrefetchMode, &MaxPrefetchMode); + + /*Scale Ratio, taps Support Check*/ + + v->ScaleRatioAndTapsSupport = true; + for (k = 0; k < v->NumberOfActivePlanes; k++) { + if (v->ScalerEnabled[k] == false + && ((v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 + && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16 + && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe + && v->SourcePixelFormat[k] != dm_rgbe_alpha) || v->HRatio[k] != 1.0 || v->htaps[k] != 1.0 + || v->VRatio[k] != 1.0 || v->vtaps[k] != 1.0)) { + v->ScaleRatioAndTapsSupport = false; + } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0 + || (v->htaps[k] > 1.0 && (v->htaps[k] % 2) == 1) || v->HRatio[k] > v->MaxHSCLRatio + || v->VRatio[k] > v->MaxVSCLRatio || v->HRatio[k] > v->htaps[k] + || v->VRatio[k] > v->vtaps[k] + || (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 + && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16 + && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe + && (v->VTAPsChroma[k] < 1 || v->VTAPsChroma[k] > 8 || v->HTAPsChroma[k] < 1 + || v->HTAPsChroma[k] > 8 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1) + || v->HRatioChroma[k] > v->MaxHSCLRatio + || v->VRatioChroma[k] > v->MaxVSCLRatio + || v->HRatioChroma[k] > v->HTAPsChroma[k] + || v->VRatioChroma[k] > v->VTAPsChroma[k]))) { + v->ScaleRatioAndTapsSupport = false; + } + } + /*Source Format, Pixel Format and Scan Support Check*/ + + v->SourceFormatPixelAndScanSupport = true; + for (k = 0; k < v->NumberOfActivePlanes; k++) { + if ((v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true)) + || ((v->SurfaceTiling[k] == dm_sw_64kb_d || v->SurfaceTiling[k] == dm_sw_64kb_d_t + || v->SurfaceTiling[k] == dm_sw_64kb_d_x) && !(v->SourcePixelFormat[k] == dm_444_64))) { + v->SourceFormatPixelAndScanSupport = false; + } + } + /*Bandwidth Support Check*/ + + for (k = 0; k < v->NumberOfActivePlanes; k++) { + dml30_CalculateBytePerPixelAnd256BBlockSizes( + v->SourcePixelFormat[k], + v->SurfaceTiling[k], + &v->BytePerPixelY[k], + &v->BytePerPixelC[k], + &v->BytePerPixelInDETY[k], + &v->BytePerPixelInDETC[k], + &v->Read256BlockHeightY[k], + &v->Read256BlockHeightC[k], + &v->Read256BlockWidthY[k], + &v->Read256BlockWidthC[k]); + } + for (k = 0; k < v->NumberOfActivePlanes; k++) { + if (v->SourceScan[k] != dm_vert) { + v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k]; + v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k]; + } else { + v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k]; + v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k]; + } + } + for (k = 0; k < v->NumberOfActivePlanes; k++) { + v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0) + / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k]; + v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0) + / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0; + } + for (k = 0; k < v->NumberOfActivePlanes; k++) { + if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_64) { + v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] + / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 8.0; + } else if (v->WritebackEnable[k] == true) { + v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] + / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4.0; + } else { + v->WriteBandwidth[k] = 0.0; + } + } + + /*Writeback Latency support check*/ + + v->WritebackLatencySupport = true; + for (k = 0; k < v->NumberOfActivePlanes; k++) { + if (v->WritebackEnable[k] == true && (v->WriteBandwidth[k] > v->WritebackInterfaceBufferSize * 1024 / v->WritebackLatency)) { + v->WritebackLatencySupport = false; + } + } + + /*Writeback Mode Support Check*/ + + v->TotalNumberOfActiveWriteback = 0; + for (k = 0; k < v->NumberOfActivePlanes; k++) { + if (v->WritebackEnable[k] == true) { + v->TotalNumberOfActiveWriteback = v->TotalNumberOfActiveWriteback + 1; + } + } + + if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) { + EnoughWritebackUnits = false; + } + + /*Writeback Scale Ratio and Taps Support Check*/ + + v->WritebackScaleRatioAndTapsSupport = true; + for (k = 0; k < v->NumberOfActivePlanes; k++) { + if (v->WritebackEnable[k] == true) { + if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio || v->WritebackVRatio[k] > v->WritebackMaxVSCLRatio + || v->WritebackHRatio[k] < v->WritebackMinHSCLRatio + || v->WritebackVRatio[k] < v->WritebackMinVSCLRatio + || v->WritebackHTaps[k] > v->WritebackMaxHSCLTaps + || v->WritebackVTaps[k] > v->WritebackMaxVSCLTaps + || v->WritebackHRatio[k] > v->WritebackHTaps[k] || v->WritebackVRatio[k] > v->WritebackVTaps[k] + || (v->WritebackHTaps[k] > 2.0 && ((v->WritebackHTaps[k] % 2) == 1))) { + v->WritebackScaleRatioAndTapsSupport = false; + } + if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) { + v->WritebackScaleRatioAndTapsSupport = false; + } + } + } + /*Maximum DISPCLK/DPPCLK Support check*/ + + v->WritebackRequiredDISPCLK = 0.0; + for (k = 0; k < v->NumberOfActivePlanes; k++) { + if (v->WritebackEnable[k] == true) { + v->WritebackRequiredDISPCLK = dml_max( + v->WritebackRequiredDISPCLK, + dml31_CalculateWriteBackDISPCLK( + v->WritebackPixelFormat[k], + v->PixelClock[k], + v->WritebackHRatio[k], + v->WritebackVRatio[k], + v->WritebackHTaps[k], + v->WritebackVTaps[k], + v->WritebackSourceWidth[k], + v->WritebackDestinationWidth[k], + v->HTotal[k], + v->WritebackLineBufferSize)); + } + } + for (k = 0; k < v->NumberOfActivePlanes; k++) { + if (v->HRatio[k] > 1.0) { + v->PSCL_FACTOR[k] = dml_min( + v->MaxDCHUBToPSCLThroughput, + v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0)); + } else { + v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); + } + if (v->BytePerPixelC[k] == 0.0) { + v->PSCL_FACTOR_CHROMA[k] = 0.0; + v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k] + * dml_max3( + v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), + v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k], + 1.0); + if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) { + v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k]; + } + } else { + if (v->HRatioChroma[k] > 1.0) { + v->PSCL_FACTOR_CHROMA[k] = dml_min( + v->MaxDCHUBToPSCLThroughput, + v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0)); + } else { + v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); + } + v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k] + * dml_max5( + v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), + v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k], + v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]), + v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k], + 1.0); + if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0) + && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) { + v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k]; + } + } + } + for (k = 0; k < v->NumberOfActivePlanes; k++) { + int MaximumSwathWidthSupportLuma; + int MaximumSwathWidthSupportChroma; + + if (v->SurfaceTiling[k] == dm_sw_linear) { + MaximumSwathWidthSupportLuma = 8192.0; + } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) { + MaximumSwathWidthSupportLuma = 2880.0; + } else if (v->SourcePixelFormat[k] == dm_rgbe_alpha) { + MaximumSwathWidthSupportLuma = 3840.0; + } else { + MaximumSwathWidthSupportLuma = 5760.0; + } + + if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) { + MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0; + } else { + MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma; + } + v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k] + / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0)); + if (v->BytePerPixelC[k] == 0.0) { + v->MaximumSwathWidthInLineBufferChroma = 0; + } else { + v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k] + / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0)); + } + v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma); + v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma); + } + + CalculateSwathAndDETConfiguration( + true, + v->NumberOfActivePlanes, + mode_lib->project == DML_PROJECT_DCN315 && v->DETSizeOverride[0], + v->DETBufferSizeInKByte, + v->MaximumSwathWidthLuma, + v->MaximumSwathWidthChroma, + v->SourceScan, + v->SourcePixelFormat, + v->SurfaceTiling, + v->ViewportWidth, + v->ViewportHeight, + v->SurfaceWidthY, + v->SurfaceWidthC, + v->SurfaceHeightY, + v->SurfaceHeightC, + v->Read256BlockHeightY, + v->Read256BlockHeightC, + v->Read256BlockWidthY, + v->Read256BlockWidthC, + v->odm_combine_dummy, + v->BlendingAndTiming, + v->BytePerPixelY, + v->BytePerPixelC, + v->BytePerPixelInDETY, + v->BytePerPixelInDETC, + v->HActive, + v->HRatio, + v->HRatioChroma, + v->NoOfDPPThisState, + v->swath_width_luma_ub_this_state, + v->swath_width_chroma_ub_this_state, + v->SwathWidthYThisState, + v->SwathWidthCThisState, + v->SwathHeightYThisState, + v->SwathHeightCThisState, + v->DETBufferSizeYThisState, + v->DETBufferSizeCThisState, + v->SingleDPPViewportSizeSupportPerPlane, + &v->ViewportSizeSupport[0][0]); + + for (i = 0; i < v->soc.num_states; i++) { + for (j = 0; j < 2; j++) { + v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed); + v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed); + v->RequiredDISPCLK[i][j] = 0.0; + v->DISPCLK_DPPCLK_Support[i][j] = true; + for (k = 0; k < v->NumberOfActivePlanes; k++) { + v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) + * (1.0 + v->DISPCLKRampingMargin / 100.0); + if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i] + && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] + && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { + v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] + * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); + } + v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) + * (1 + v->DISPCLKRampingMargin / 100.0); + if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i] + && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] + && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { + v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 + * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); + } + v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) + * (1 + v->DISPCLKRampingMargin / 100.0); + if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i] + && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] + && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { + v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 + * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); + } + + if (v->ODMCombinePolicy == dm_odm_combine_policy_none + || !(v->Output[k] == dm_dp || + v->Output[k] == dm_dp2p0 || + v->Output[k] == dm_edp)) { + v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled; + v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine; + + if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH) + FMTBufferExceeded = true; + } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) { + v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; + v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; + } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1 + || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) { + v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1; + v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1; + } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) { + v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; + v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; + } else { + v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled; + v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine; + } + if (v->DSCEnabled[k] && v->HActive[k] > DCN31_MAX_DSC_IMAGE_WIDTH + && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) { + if (v->HActive[k] / 2 > DCN31_MAX_DSC_IMAGE_WIDTH) { + v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1; + v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1; + } else { + v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; + v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; + } + } + if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN31_MAX_FMT_420_BUFFER_WIDTH + && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) { + if (v->Output[k] == dm_hdmi) { + FMTBufferExceeded = true; + } else if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH) { + v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1; + v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1; + + if (v->HActive[k] / 4 > DCN31_MAX_FMT_420_BUFFER_WIDTH) + FMTBufferExceeded = true; + } else { + v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; + v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; + } + } + if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) { + v->MPCCombine[i][j][k] = false; + v->NoOfDPP[i][j][k] = 4; + v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4; + } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { + v->MPCCombine[i][j][k] = false; + v->NoOfDPP[i][j][k] = 2; + v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2; + } else if ((v->WhenToDoMPCCombine == dm_mpc_never + || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) + <= v->MaxDppclkRoundedDownToDFSGranularity && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) { + v->MPCCombine[i][j][k] = false; + v->NoOfDPP[i][j][k] = 1; + v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); + } else { + v->MPCCombine[i][j][k] = true; + v->NoOfDPP[i][j][k] = 2; + v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0; + } + v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK); + if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) + > v->MaxDppclkRoundedDownToDFSGranularity) + || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) { + v->DISPCLK_DPPCLK_Support[i][j] = false; + } + if (mode_lib->project == DML_PROJECT_DCN315 && v->DETSizeOverride[k] > DCN3_15_MAX_DET_SIZE && v->NoOfDPP[i][j][k] < 2) { + v->MPCCombine[i][j][k] = true; + v->NoOfDPP[i][j][k] = 2; + } + } + v->TotalNumberOfActiveDPP[i][j] = 0; + v->TotalNumberOfSingleDPPPlanes[i][j] = 0; + for (k = 0; k < v->NumberOfActivePlanes; k++) { + v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k]; + if (v->NoOfDPP[i][j][k] == 1) + v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1; + if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 + || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) + NoChroma = false; + } + + // UPTO + if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never + && !UnboundedRequest(v->UseUnboundedRequesting, v->TotalNumberOfActiveDPP[i][j], NoChroma, v->Output[0])) { + while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) { + double BWOfNonSplitPlaneOfMaximumBandwidth; + unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth; + BWOfNonSplitPlaneOfMaximumBandwidth = 0; + NumberOfNonSplitPlaneOfMaximumBandwidth = 0; + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth + && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) { + BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]; + NumberOfNonSplitPlaneOfMaximumBandwidth = k; + } + } + v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true; + v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2; + v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = + v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth] + * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2; + v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1; + v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1; + } + } + if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) { + v->RequiredDISPCLK[i][j] = 0.0; + v->DISPCLK_DPPCLK_Support[i][j] = true; + for (k = 0; k < v->NumberOfActivePlanes; k++) { + v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled; + if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) { + v->MPCCombine[i][j][k] = true; + v->NoOfDPP[i][j][k] = 2; + v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] + * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0; + } else { + v->MPCCombine[i][j][k] = false; + v->NoOfDPP[i][j][k] = 1; + v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] + * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); + } + if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] + && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { + v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) + * (1.0 + v->DISPCLKRampingMargin / 100.0); + } else { + v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); + } + v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK); + if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) + > v->MaxDppclkRoundedDownToDFSGranularity) + || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) { + v->DISPCLK_DPPCLK_Support[i][j] = false; + } + } + v->TotalNumberOfActiveDPP[i][j] = 0.0; + for (k = 0; k < v->NumberOfActivePlanes; k++) { + v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k]; + } + } + v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK); + if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) { + v->DISPCLK_DPPCLK_Support[i][j] = false; + } + } + } + + /*Total Available Pipes Support Check*/ + + for (i = 0; i < v->soc.num_states; i++) { + for (j = 0; j < 2; j++) { + if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) { + v->TotalAvailablePipesSupport[i][j] = true; + } else { + v->TotalAvailablePipesSupport[i][j] = false; + } + } + } + /*Display IO and DSC Support Check*/ + + v->NonsupportedDSCInputBPC = false; + for (k = 0; k < v->NumberOfActivePlanes; k++) { + if (!(v->DSCInputBitPerComponent[k] == 12.0 || v->DSCInputBitPerComponent[k] == 10.0 || v->DSCInputBitPerComponent[k] == 8.0) + || v->DSCInputBitPerComponent[k] > v->MaximumDSCBitsPerComponent) { + v->NonsupportedDSCInputBPC = true; + } + } + + /*Number Of DSC Slices*/ + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + if (v->BlendingAndTiming[k] == k) { + if (v->PixelClockBackEnd[k] > 3200) { + v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0); + } else if (v->PixelClockBackEnd[k] > 1360) { + v->NumberOfDSCSlices[k] = 8; + } else if (v->PixelClockBackEnd[k] > 680) { + v->NumberOfDSCSlices[k] = 4; + } else if (v->PixelClockBackEnd[k] > 340) { + v->NumberOfDSCSlices[k] = 2; + } else { + v->NumberOfDSCSlices[k] = 1; + } + } else { + v->NumberOfDSCSlices[k] = 0; + } + } + + for (i = 0; i < v->soc.num_states; i++) { + for (k = 0; k < v->NumberOfActivePlanes; k++) { + v->RequiresDSC[i][k] = false; + v->RequiresFEC[i][k] = false; + if (v->BlendingAndTiming[k] == k) { + if (v->Output[k] == dm_hdmi) { + v->RequiresDSC[i][k] = false; + v->RequiresFEC[i][k] = false; + v->OutputBppPerState[i][k] = TruncToValidBPP( + dml_min(600.0, v->PHYCLKPerState[i]) * 10, + 3, + v->HTotal[k], + v->HActive[k], + v->PixelClockBackEnd[k], + v->ForcedOutputLinkBPP[k], + false, + v->Output[k], + v->OutputFormat[k], + v->DSCInputBitPerComponent[k], + v->NumberOfDSCSlices[k], + v->AudioSampleRate[k], + v->AudioSampleLayout[k], + v->ODMCombineEnablePerState[i][k]); + } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_dp2p0) { + if (v->DSCEnable[k] == true) { + v->RequiresDSC[i][k] = true; + v->LinkDSCEnable = true; + if (v->Output[k] == dm_dp || v->Output[k] == dm_dp2p0) { + v->RequiresFEC[i][k] = true; + } else { + v->RequiresFEC[i][k] = false; + } + } else { + v->RequiresDSC[i][k] = false; + v->LinkDSCEnable = false; + if (v->Output[k] == dm_dp2p0) { + v->RequiresFEC[i][k] = true; + } else { + v->RequiresFEC[i][k] = false; + } + } + if (v->Output[k] == dm_dp2p0) { + v->Outbpp = BPP_INVALID; + if ((v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr10) && + v->PHYCLKD18PerState[k] >= 10000.0 / 18.0) { + v->Outbpp = TruncToValidBPP( + (1.0 - v->Downspreading / 100.0) * 10000, + v->OutputLinkDPLanes[k], + v->HTotal[k], + v->HActive[k], + v->PixelClockBackEnd[k], + v->ForcedOutputLinkBPP[k], + v->LinkDSCEnable, + v->Output[k], + v->OutputFormat[k], + v->DSCInputBitPerComponent[k], + v->NumberOfDSCSlices[k], + v->AudioSampleRate[k], + v->AudioSampleLayout[k], + v->ODMCombineEnablePerState[i][k]); + if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[k] < 13500.0 / 18.0 && + v->DSCEnable[k] == true && v->ForcedOutputLinkBPP[k] == 0) { + v->RequiresDSC[i][k] = true; + v->LinkDSCEnable = true; + v->Outbpp = TruncToValidBPP( + (1.0 - v->Downspreading / 100.0) * 10000, + v->OutputLinkDPLanes[k], + v->HTotal[k], + v->HActive[k], + v->PixelClockBackEnd[k], + v->ForcedOutputLinkBPP[k], + v->LinkDSCEnable, + v->Output[k], + v->OutputFormat[k], + v->DSCInputBitPerComponent[k], + v->NumberOfDSCSlices[k], + v->AudioSampleRate[k], + v->AudioSampleLayout[k], + v->ODMCombineEnablePerState[i][k]); + } + v->OutputBppPerState[i][k] = v->Outbpp; + // TODO: Need some other way to handle this nonsense + // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR10" + } + if (v->Outbpp == BPP_INVALID && + (v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr13p5) && + v->PHYCLKD18PerState[k] >= 13500.0 / 18.0) { + v->Outbpp = TruncToValidBPP( + (1.0 - v->Downspreading / 100.0) * 13500, + v->OutputLinkDPLanes[k], + v->HTotal[k], + v->HActive[k], + v->PixelClockBackEnd[k], + v->ForcedOutputLinkBPP[k], + v->LinkDSCEnable, + v->Output[k], + v->OutputFormat[k], + v->DSCInputBitPerComponent[k], + v->NumberOfDSCSlices[k], + v->AudioSampleRate[k], + v->AudioSampleLayout[k], + v->ODMCombineEnablePerState[i][k]); + if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[k] < 20000.0 / 18.0 && + v->DSCEnable[k] == true && v->ForcedOutputLinkBPP[k] == 0) { + v->RequiresDSC[i][k] = true; + v->LinkDSCEnable = true; + v->Outbpp = TruncToValidBPP( + (1.0 - v->Downspreading / 100.0) * 13500, + v->OutputLinkDPLanes[k], + v->HTotal[k], + v->HActive[k], + v->PixelClockBackEnd[k], + v->ForcedOutputLinkBPP[k], + v->LinkDSCEnable, + v->Output[k], + v->OutputFormat[k], + v->DSCInputBitPerComponent[k], + v->NumberOfDSCSlices[k], + v->AudioSampleRate[k], + v->AudioSampleLayout[k], + v->ODMCombineEnablePerState[i][k]); + } + v->OutputBppPerState[i][k] = v->Outbpp; + // TODO: Need some other way to handle this nonsense + // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR13p5" + } + if (v->Outbpp == BPP_INVALID && + (v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr20) && + v->PHYCLKD18PerState[k] >= 20000.0 / 18.0) { + v->Outbpp = TruncToValidBPP( + (1.0 - v->Downspreading / 100.0) * 20000, + v->OutputLinkDPLanes[k], + v->HTotal[k], + v->HActive[k], + v->PixelClockBackEnd[k], + v->ForcedOutputLinkBPP[k], + v->LinkDSCEnable, + v->Output[k], + v->OutputFormat[k], + v->DSCInputBitPerComponent[k], + v->NumberOfDSCSlices[k], + v->AudioSampleRate[k], + v->AudioSampleLayout[k], + v->ODMCombineEnablePerState[i][k]); + if (v->Outbpp == BPP_INVALID && v->DSCEnable[k] == true && + v->ForcedOutputLinkBPP[k] == 0) { + v->RequiresDSC[i][k] = true; + v->LinkDSCEnable = true; + v->Outbpp = TruncToValidBPP( + (1.0 - v->Downspreading / 100.0) * 20000, + v->OutputLinkDPLanes[k], + v->HTotal[k], + v->HActive[k], + v->PixelClockBackEnd[k], + v->ForcedOutputLinkBPP[k], + v->LinkDSCEnable, + v->Output[k], + v->OutputFormat[k], + v->DSCInputBitPerComponent[k], + v->NumberOfDSCSlices[k], + v->AudioSampleRate[k], + v->AudioSampleLayout[k], + v->ODMCombineEnablePerState[i][k]); + } + v->OutputBppPerState[i][k] = v->Outbpp; + // TODO: Need some other way to handle this nonsense + // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR20" + } + } else { + v->Outbpp = BPP_INVALID; + if (v->PHYCLKPerState[i] >= 270.0) { + v->Outbpp = TruncToValidBPP( + (1.0 - v->Downspreading / 100.0) * 2700, + v->OutputLinkDPLanes[k], + v->HTotal[k], + v->HActive[k], + v->PixelClockBackEnd[k], + v->ForcedOutputLinkBPP[k], + v->LinkDSCEnable, + v->Output[k], + v->OutputFormat[k], + v->DSCInputBitPerComponent[k], + v->NumberOfDSCSlices[k], + v->AudioSampleRate[k], + v->AudioSampleLayout[k], + v->ODMCombineEnablePerState[i][k]); + v->OutputBppPerState[i][k] = v->Outbpp; + // TODO: Need some other way to handle this nonsense + // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR" + } + if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) { + v->Outbpp = TruncToValidBPP( + (1.0 - v->Downspreading / 100.0) * 5400, + v->OutputLinkDPLanes[k], + v->HTotal[k], + v->HActive[k], + v->PixelClockBackEnd[k], + v->ForcedOutputLinkBPP[k], + v->LinkDSCEnable, + v->Output[k], + v->OutputFormat[k], + v->DSCInputBitPerComponent[k], + v->NumberOfDSCSlices[k], + v->AudioSampleRate[k], + v->AudioSampleLayout[k], + v->ODMCombineEnablePerState[i][k]); + v->OutputBppPerState[i][k] = v->Outbpp; + // TODO: Need some other way to handle this nonsense + // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2" + } + if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) { + v->Outbpp = TruncToValidBPP( + (1.0 - v->Downspreading / 100.0) * 8100, + v->OutputLinkDPLanes[k], + v->HTotal[k], + v->HActive[k], + v->PixelClockBackEnd[k], + v->ForcedOutputLinkBPP[k], + v->LinkDSCEnable, + v->Output[k], + v->OutputFormat[k], + v->DSCInputBitPerComponent[k], + v->NumberOfDSCSlices[k], + v->AudioSampleRate[k], + v->AudioSampleLayout[k], + v->ODMCombineEnablePerState[i][k]); + v->OutputBppPerState[i][k] = v->Outbpp; + // TODO: Need some other way to handle this nonsense + // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3" + } + } + } + } else { + v->OutputBppPerState[i][k] = 0; + } + } + } + + for (i = 0; i < v->soc.num_states; i++) { + v->LinkCapacitySupport[i] = true; + for (k = 0; k < v->NumberOfActivePlanes; k++) { + if (v->BlendingAndTiming[k] == k + && (v->Output[k] == dm_dp || + v->Output[k] == dm_edp || + v->Output[k] == dm_hdmi) && v->OutputBppPerState[i][k] == 0) { + v->LinkCapacitySupport[i] = false; + } + } + } + + // UPTO 2172 + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + if (v->BlendingAndTiming[k] == k + && (v->Output[k] == dm_dp || + v->Output[k] == dm_edp || + v->Output[k] == dm_hdmi)) { + if (v->OutputFormat[k] == dm_420 && v->Interlace[k] == 1 && v->ProgressiveToInterlaceUnitInOPP == true) { + P2IWith420 = true; + } + if (v->DSCEnable[k] == true && v->OutputFormat[k] == dm_n422 + && !v->DSC422NativeSupport) { + DSC422NativeNotSupported = true; + } + } + } + + for (i = 0; i < v->soc.num_states; ++i) { + v->ODMCombine4To1SupportCheckOK[i] = true; + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1 + && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp + || v->Output[k] == dm_hdmi)) { + v->ODMCombine4To1SupportCheckOK[i] = false; + } + } + } + + /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */ + + for (i = 0; i < v->soc.num_states; i++) { + v->NotEnoughDSCUnits[i] = false; + v->TotalDSCUnitsRequired = 0.0; + for (k = 0; k < v->NumberOfActivePlanes; k++) { + if (v->RequiresDSC[i][k] == true) { + if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) { + v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0; + } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { + v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0; + } else { + v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0; + } + } + } + if (v->TotalDSCUnitsRequired > v->NumberOfDSC) { + v->NotEnoughDSCUnits[i] = true; + } + } + /*DSC Delay per state*/ + + for (i = 0; i < v->soc.num_states; i++) { + for (k = 0; k < v->NumberOfActivePlanes; k++) { + if (v->OutputBppPerState[i][k] == BPP_INVALID) { + v->BPP = 0.0; + } else { + v->BPP = v->OutputBppPerState[i][k]; + } + if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) { + if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) { + v->DSCDelayPerState[i][k] = dscceComputeDelay( + v->DSCInputBitPerComponent[k], + v->BPP, + dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0), + v->NumberOfDSCSlices[k], + v->OutputFormat[k], + v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]); + } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { + v->DSCDelayPerState[i][k] = 2.0 + * (dscceComputeDelay( + v->DSCInputBitPerComponent[k], + v->BPP, + dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0), + v->NumberOfDSCSlices[k] / 2, + v->OutputFormat[k], + v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); + } else { + v->DSCDelayPerState[i][k] = 4.0 + * (dscceComputeDelay( + v->DSCInputBitPerComponent[k], + v->BPP, + dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0), + v->NumberOfDSCSlices[k] / 4, + v->OutputFormat[k], + v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); + } + v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k]; + } else { + v->DSCDelayPerState[i][k] = 0.0; + } + } + for (k = 0; k < v->NumberOfActivePlanes; k++) { + for (m = 0; m < v->NumberOfActivePlanes; m++) { + if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) { + v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m]; + } + } + } + } + + //Calculate Swath, DET Configuration, DCFCLKDeepSleep + // + for (i = 0; i < v->soc.num_states; ++i) { + for (j = 0; j <= 1; ++j) { + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k]; + v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k]; + v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k]; + } + + if (v->NumberOfActivePlanes > 1 && mode_lib->project == DML_PROJECT_DCN315 && !v->DETSizeOverride[0]) + PatchDETBufferSizeInKByte(v->NumberOfActivePlanes, v->NoOfDPPThisState, v->ip.config_return_buffer_size_in_kbytes, v->DETBufferSizeInKByte); + CalculateSwathAndDETConfiguration( + false, + v->NumberOfActivePlanes, + mode_lib->project == DML_PROJECT_DCN315 && v->DETSizeOverride[0], + v->DETBufferSizeInKByte, + v->MaximumSwathWidthLuma, + v->MaximumSwathWidthChroma, + v->SourceScan, + v->SourcePixelFormat, + v->SurfaceTiling, + v->ViewportWidth, + v->ViewportHeight, + v->SurfaceWidthY, + v->SurfaceWidthC, + v->SurfaceHeightY, + v->SurfaceHeightC, + v->Read256BlockHeightY, + v->Read256BlockHeightC, + v->Read256BlockWidthY, + v->Read256BlockWidthC, + v->ODMCombineEnableThisState, + v->BlendingAndTiming, + v->BytePerPixelY, + v->BytePerPixelC, + v->BytePerPixelInDETY, + v->BytePerPixelInDETC, + v->HActive, + v->HRatio, + v->HRatioChroma, + v->NoOfDPPThisState, + v->swath_width_luma_ub_this_state, + v->swath_width_chroma_ub_this_state, + v->SwathWidthYThisState, + v->SwathWidthCThisState, + v->SwathHeightYThisState, + v->SwathHeightCThisState, + v->DETBufferSizeYThisState, + v->DETBufferSizeCThisState, + v->dummystring, + &v->ViewportSizeSupport[i][j]); + + CalculateDCFCLKDeepSleep( + mode_lib, + v->NumberOfActivePlanes, + v->BytePerPixelY, + v->BytePerPixelC, + v->VRatio, + v->VRatioChroma, + v->SwathWidthYThisState, + v->SwathWidthCThisState, + v->NoOfDPPThisState, + v->HRatio, + v->HRatioChroma, + v->PixelClock, + v->PSCL_FACTOR, + v->PSCL_FACTOR_CHROMA, + v->RequiredDPPCLKThisState, + v->ReadBandwidthLuma, + v->ReadBandwidthChroma, + v->ReturnBusWidth, + &v->ProjectedDCFCLKDeepSleep[i][j]); + + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k]; + v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k]; + v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k]; + v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k]; + v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k]; + v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k]; + v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k]; + v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k]; + } + } + } + + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 + / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k]; + } + + for (i = 0; i < v->soc.num_states; i++) { + for (j = 0; j < 2; j++) { + bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX]; + + for (k = 0; k < v->NumberOfActivePlanes; k++) { + v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k]; + v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k]; + v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k]; + v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k]; + v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k]; + v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k]; + v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k]; + v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k]; + } + + v->TotalNumberOfDCCActiveDPP[i][j] = 0; + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + if (v->DCCEnable[k] == true) { + v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k]; + } + } + + for (k = 0; k < v->NumberOfActivePlanes; k++) { + if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 + || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) { + + if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) + && v->SourceScan[k] != dm_vert) { + v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) + / 2; + v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma; + } else { + v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma; + v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma; + } + + v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes( + mode_lib, + v->DCCEnable[k], + v->Read256BlockHeightC[k], + v->Read256BlockWidthC[k], + v->SourcePixelFormat[k], + v->SurfaceTiling[k], + v->BytePerPixelC[k], + v->SourceScan[k], + v->SwathWidthCThisState[k], + v->ViewportHeightChroma[k], + v->GPUVMEnable, + v->HostVMEnable, + v->HostVMMaxNonCachedPageTableLevels, + v->GPUVMMinPageSize, + v->HostVMMinPageSize, + v->PTEBufferSizeInRequestsForChroma, + v->PitchC[k], + 0.0, + &v->MacroTileWidthC[k], + &v->MetaRowBytesC, + &v->DPTEBytesPerRowC, + &v->PTEBufferSizeNotExceededC[i][j][k], + &v->dummyinteger7, + &v->dpte_row_height_chroma[k], + &v->dummyinteger28, + &v->dummyinteger26, + &v->dummyinteger23, + &v->meta_row_height_chroma[k], + &v->dummyinteger8, + &v->dummyinteger9, + &v->dummyinteger19, + &v->dummyinteger20, + &v->dummyinteger17, + &v->dummyinteger10, + &v->dummyinteger11); + + v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines( + mode_lib, + v->VRatioChroma[k], + v->VTAPsChroma[k], + v->Interlace[k], + v->ProgressiveToInterlaceUnitInOPP, + v->SwathHeightCThisState[k], + v->ViewportYStartC[k], + &v->PrefillC[k], + &v->MaxNumSwC[k]); + } else { + v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma; + v->PTEBufferSizeInRequestsForChroma = 0; + v->PDEAndMetaPTEBytesPerFrameC = 0.0; + v->MetaRowBytesC = 0.0; + v->DPTEBytesPerRowC = 0.0; + v->PrefetchLinesC[i][j][k] = 0.0; + v->PTEBufferSizeNotExceededC[i][j][k] = true; + } + v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes( + mode_lib, + v->DCCEnable[k], + v->Read256BlockHeightY[k], + v->Read256BlockWidthY[k], + v->SourcePixelFormat[k], + v->SurfaceTiling[k], + v->BytePerPixelY[k], + v->SourceScan[k], + v->SwathWidthYThisState[k], + v->ViewportHeight[k], + v->GPUVMEnable, + v->HostVMEnable, + v->HostVMMaxNonCachedPageTableLevels, + v->GPUVMMinPageSize, + v->HostVMMinPageSize, + v->PTEBufferSizeInRequestsForLuma, + v->PitchY[k], + v->DCCMetaPitchY[k], + &v->MacroTileWidthY[k], + &v->MetaRowBytesY, + &v->DPTEBytesPerRowY, + &v->PTEBufferSizeNotExceededY[i][j][k], + &v->dummyinteger7, + &v->dpte_row_height[k], + &v->dummyinteger29, + &v->dummyinteger27, + &v->dummyinteger24, + &v->meta_row_height[k], + &v->dummyinteger25, + &v->dpte_group_bytes[k], + &v->dummyinteger21, + &v->dummyinteger22, + &v->dummyinteger18, + &v->dummyinteger5, + &v->dummyinteger6); + v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines( + mode_lib, + v->VRatio[k], + v->vtaps[k], + v->Interlace[k], + v->ProgressiveToInterlaceUnitInOPP, + v->SwathHeightYThisState[k], + v->ViewportYStartY[k], + &v->PrefillY[k], + &v->MaxNumSwY[k]); + v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC; + v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC; + v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC; + + CalculateRowBandwidth( + v->GPUVMEnable, + v->SourcePixelFormat[k], + v->VRatio[k], + v->VRatioChroma[k], + v->DCCEnable[k], + v->HTotal[k] / v->PixelClock[k], + v->MetaRowBytesY, + v->MetaRowBytesC, + v->meta_row_height[k], + v->meta_row_height_chroma[k], + v->DPTEBytesPerRowY, + v->DPTEBytesPerRowC, + v->dpte_row_height[k], + v->dpte_row_height_chroma[k], + &v->meta_row_bandwidth[i][j][k], + &v->dpte_row_bandwidth[i][j][k]); + } + /*DCCMetaBufferSizeSupport(i, j) = True + For k = 0 To NumberOfActivePlanes - 1 + If MetaRowBytes(i, j, k) > 24064 Then + DCCMetaBufferSizeSupport(i, j) = False + End If + Next k*/ + v->DCCMetaBufferSizeSupport[i][j] = true; + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + if (v->MetaRowBytes[i][j][k] > 24064) + v->DCCMetaBufferSizeSupport[i][j] = false; + } + v->UrgLatency[i] = CalculateUrgentLatency( + v->UrgentLatencyPixelDataOnly, + v->UrgentLatencyPixelMixedWithVMData, + v->UrgentLatencyVMDataOnly, + v->DoUrgentLatencyAdjustment, + v->UrgentLatencyAdjustmentFabricClockComponent, + v->UrgentLatencyAdjustmentFabricClockReference, + v->FabricClockPerState[i]); + + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + CalculateUrgentBurstFactor( + v->swath_width_luma_ub_this_state[k], + v->swath_width_chroma_ub_this_state[k], + v->SwathHeightYThisState[k], + v->SwathHeightCThisState[k], + v->HTotal[k] / v->PixelClock[k], + v->UrgLatency[i], + v->CursorBufferSize, + v->CursorWidth[k][0], + v->CursorBPP[k][0], + v->VRatio[k], + v->VRatioChroma[k], + v->BytePerPixelInDETY[k], + v->BytePerPixelInDETC[k], + v->DETBufferSizeYThisState[k], + v->DETBufferSizeCThisState[k], + &v->UrgentBurstFactorCursor[k], + &v->UrgentBurstFactorLuma[k], + &v->UrgentBurstFactorChroma[k], + &NotUrgentLatencyHiding[k]); + } + + v->NotEnoughUrgentLatencyHidingA[i][j] = false; + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + if (NotUrgentLatencyHiding[k]) { + v->NotEnoughUrgentLatencyHidingA[i][j] = true; + } + } + + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k] + + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k]; + v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k]; + } + + v->TotalVActivePixelBandwidth[i][j] = 0; + v->TotalVActiveCursorBandwidth[i][j] = 0; + v->TotalMetaRowBandwidth[i][j] = 0; + v->TotalDPTERowBandwidth[i][j] = 0; + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k]; + v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k]; + v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k]; + v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k]; + } + } + } + + //Calculate Return BW + for (i = 0; i < v->soc.num_states; ++i) { + for (j = 0; j <= 1; ++j) { + for (k = 0; k < v->NumberOfActivePlanes; k++) { + if (v->BlendingAndTiming[k] == k) { + if (v->WritebackEnable[k] == true) { + v->WritebackDelayTime[k] = v->WritebackLatency + + CalculateWriteBackDelay( + v->WritebackPixelFormat[k], + v->WritebackHRatio[k], + v->WritebackVRatio[k], + v->WritebackVTaps[k], + v->WritebackDestinationWidth[k], + v->WritebackDestinationHeight[k], + v->WritebackSourceHeight[k], + v->HTotal[k]) / v->RequiredDISPCLK[i][j]; + } else { + v->WritebackDelayTime[k] = 0.0; + } + for (m = 0; m < v->NumberOfActivePlanes; m++) { + if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) { + v->WritebackDelayTime[k] = dml_max( + v->WritebackDelayTime[k], + v->WritebackLatency + + CalculateWriteBackDelay( + v->WritebackPixelFormat[m], + v->WritebackHRatio[m], + v->WritebackVRatio[m], + v->WritebackVTaps[m], + v->WritebackDestinationWidth[m], + v->WritebackDestinationHeight[m], + v->WritebackSourceHeight[m], + v->HTotal[m]) / v->RequiredDISPCLK[i][j]); + } + } + } + } + for (k = 0; k < v->NumberOfActivePlanes; k++) { + for (m = 0; m < v->NumberOfActivePlanes; m++) { + if (v->BlendingAndTiming[k] == m) { + v->WritebackDelayTime[k] = v->WritebackDelayTime[m]; + } + } + } + v->MaxMaxVStartup[i][j] = 0; + for (k = 0; k < v->NumberOfActivePlanes; k++) { + v->MaximumVStartup[i][j][k] = + (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ? + dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) : + v->VTotal[k] - v->VActive[k] + - dml_max( + 1.0, + dml_ceil( + 1.0 * v->WritebackDelayTime[k] + / (v->HTotal[k] + / v->PixelClock[k]), + 1.0)); + if (v->MaximumVStartup[i][j][k] > 1023) + v->MaximumVStartup[i][j][k] = 1023; + v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]); + } + } + } + + ReorderingBytes = v->NumberOfChannels + * dml_max3( + v->UrgentOutOfOrderReturnPerChannelPixelDataOnly, + v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData, + v->UrgentOutOfOrderReturnPerChannelVMDataOnly); + + for (i = 0; i < v->soc.num_states; ++i) { + for (j = 0; j <= 1; ++j) { + v->DCFCLKState[i][j] = v->DCFCLKPerState[i]; + } + } + + if (v->UseMinimumRequiredDCFCLK == true) + UseMinimumDCFCLK(mode_lib, MaxPrefetchMode, ReorderingBytes); + + for (i = 0; i < v->soc.num_states; ++i) { + for (j = 0; j <= 1; ++j) { + double IdealFabricAndSDPPortBandwidthPerState = dml_min( + v->ReturnBusWidth * v->DCFCLKState[i][j], + v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn); + double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth; + double PixelDataOnlyReturnBWPerState = dml_min( + IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, + IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0); + double PixelMixedWithVMDataReturnBWPerState = dml_min( + IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, + IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0); + + if (v->HostVMEnable != true) { + v->ReturnBWPerState[i][j] = PixelDataOnlyReturnBWPerState; + } else { + v->ReturnBWPerState[i][j] = PixelMixedWithVMDataReturnBWPerState; + } + } + } + + //Re-ordering Buffer Support Check + for (i = 0; i < v->soc.num_states; ++i) { + for (j = 0; j <= 1; ++j) { + if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j] + > (v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) { + v->ROBSupport[i][j] = true; + } else { + v->ROBSupport[i][j] = false; + } + } + } + + //Vertical Active BW support check + + MaxTotalVActiveRDBandwidth = 0; + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]; + } + + for (i = 0; i < v->soc.num_states; ++i) { + for (j = 0; j <= 1; ++j) { + v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min( + dml_min( + v->ReturnBusWidth * v->DCFCLKState[i][j], + v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn) + * v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100, + v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth + * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100); + + if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) { + v->TotalVerticalActiveBandwidthSupport[i][j] = true; + } else { + v->TotalVerticalActiveBandwidthSupport[i][j] = false; + } + } + } + + v->UrgentLatency = CalculateUrgentLatency( + v->UrgentLatencyPixelDataOnly, + v->UrgentLatencyPixelMixedWithVMData, + v->UrgentLatencyVMDataOnly, + v->DoUrgentLatencyAdjustment, + v->UrgentLatencyAdjustmentFabricClockComponent, + v->UrgentLatencyAdjustmentFabricClockReference, + v->FabricClock); + //Prefetch Check + for (i = 0; i < v->soc.num_states; ++i) { + for (j = 0; j <= 1; ++j) { + double VMDataOnlyReturnBWPerState; + double HostVMInefficiencyFactor = 1; + int NextPrefetchModeState = MinPrefetchMode; + bool UnboundedRequestEnabledThisState = false; + int CompressedBufferSizeInkByteThisState = 0; + double dummy; + + v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j]; + + v->BandwidthWithoutPrefetchSupported[i][j] = true; + if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j] + + v->TotalDPTERowBandwidth[i][j] > v->ReturnBWPerState[i][j] || v->NotEnoughUrgentLatencyHidingA[i][j]) { + v->BandwidthWithoutPrefetchSupported[i][j] = false; + } + + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k]; + v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k]; + v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k]; + v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k]; + v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k]; + v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k]; + v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k]; + v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k]; + v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k]; + } + + VMDataOnlyReturnBWPerState = dml_min( + dml_min( + v->ReturnBusWidth * v->DCFCLKState[i][j], + v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn) + * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, + v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth + * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0); + if (v->GPUVMEnable && v->HostVMEnable) + HostVMInefficiencyFactor = v->ReturnBWPerState[i][j] / VMDataOnlyReturnBWPerState; + + v->ExtraLatency = CalculateExtraLatency( + v->RoundTripPingLatencyCycles, + ReorderingBytes, + v->DCFCLKState[i][j], + v->TotalNumberOfActiveDPP[i][j], + v->PixelChunkSizeInKByte, + v->TotalNumberOfDCCActiveDPP[i][j], + v->MetaChunkSize, + v->ReturnBWPerState[i][j], + v->GPUVMEnable, + v->HostVMEnable, + v->NumberOfActivePlanes, + v->NoOfDPPThisState, + v->dpte_group_bytes, + HostVMInefficiencyFactor, + v->HostVMMinPageSize, + v->HostVMMaxNonCachedPageTableLevels); + + v->NextMaxVStartup = v->MaxMaxVStartup[i][j]; + do { + v->PrefetchModePerState[i][j] = NextPrefetchModeState; + v->MaxVStartup = v->NextMaxVStartup; + + v->TWait = CalculateTWait( + v->PrefetchModePerState[i][j], + v->DRAMClockChangeLatency, + v->UrgLatency[i], + v->SREnterPlusExitTime); + + for (k = 0; k < v->NumberOfActivePlanes; k++) { + CalculatePrefetchSchedulePerPlane(mode_lib, + HostVMInefficiencyFactor, + i, j, k); + } + + for (k = 0; k < v->NumberOfActivePlanes; k++) { + CalculateUrgentBurstFactor( + v->swath_width_luma_ub_this_state[k], + v->swath_width_chroma_ub_this_state[k], + v->SwathHeightYThisState[k], + v->SwathHeightCThisState[k], + v->HTotal[k] / v->PixelClock[k], + v->UrgentLatency, + v->CursorBufferSize, + v->CursorWidth[k][0], + v->CursorBPP[k][0], + v->VRatioPreY[i][j][k], + v->VRatioPreC[i][j][k], + v->BytePerPixelInDETY[k], + v->BytePerPixelInDETC[k], + v->DETBufferSizeYThisState[k], + v->DETBufferSizeCThisState[k], + &v->UrgentBurstFactorCursorPre[k], + &v->UrgentBurstFactorLumaPre[k], + &v->UrgentBurstFactorChromaPre[k], + &v->NotUrgentLatencyHidingPre[k]); + } + + v->MaximumReadBandwidthWithPrefetch = 0.0; + for (k = 0; k < v->NumberOfActivePlanes; k++) { + v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 + / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPreY[i][j][k]; + + v->MaximumReadBandwidthWithPrefetch = + v->MaximumReadBandwidthWithPrefetch + + dml_max3( + v->VActivePixelBandwidth[i][j][k] + + v->VActiveCursorBandwidth[i][j][k] + + v->NoOfDPP[i][j][k] + * (v->meta_row_bandwidth[i][j][k] + + v->dpte_row_bandwidth[i][j][k]), + v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k], + v->NoOfDPP[i][j][k] + * (v->RequiredPrefetchPixelDataBWLuma[i][j][k] + * v->UrgentBurstFactorLumaPre[k] + + v->RequiredPrefetchPixelDataBWChroma[i][j][k] + * v->UrgentBurstFactorChromaPre[k]) + + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]); + } + + v->NotEnoughUrgentLatencyHidingPre = false; + for (k = 0; k < v->NumberOfActivePlanes; k++) { + if (v->NotUrgentLatencyHidingPre[k] == true) { + v->NotEnoughUrgentLatencyHidingPre = true; + } + } + + v->PrefetchSupported[i][j] = true; + if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j] + || v->NotEnoughUrgentLatencyHidingPre == 1) { + v->PrefetchSupported[i][j] = false; + } + for (k = 0; k < v->NumberOfActivePlanes; k++) { + if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0 + || v->NoTimeForPrefetch[i][j][k] == true) { + v->PrefetchSupported[i][j] = false; + } + } + + v->DynamicMetadataSupported[i][j] = true; + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + if (v->NoTimeForDynamicMetadata[i][j][k] == true) { + v->DynamicMetadataSupported[i][j] = false; + } + } + + v->VRatioInPrefetchSupported[i][j] = true; + for (k = 0; k < v->NumberOfActivePlanes; k++) { + if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) { + v->VRatioInPrefetchSupported[i][j] = false; + } + } + v->AnyLinesForVMOrRowTooLarge = false; + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) { + v->AnyLinesForVMOrRowTooLarge = true; + } + } + + v->NextPrefetchMode = v->NextPrefetchMode + 1; + + if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) { + v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j]; + for (k = 0; k < v->NumberOfActivePlanes; k++) { + v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip + - dml_max( + v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k], + v->NoOfDPP[i][j][k] + * (v->RequiredPrefetchPixelDataBWLuma[i][j][k] + * v->UrgentBurstFactorLumaPre[k] + + v->RequiredPrefetchPixelDataBWChroma[i][j][k] + * v->UrgentBurstFactorChromaPre[k]) + + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]); + } + v->TotImmediateFlipBytes = 0.0; + for (k = 0; k < v->NumberOfActivePlanes; k++) { + v->TotImmediateFlipBytes = v->TotImmediateFlipBytes + + v->NoOfDPP[i][j][k] * v->PDEAndMetaPTEBytesPerFrame[i][j][k] + v->MetaRowBytes[i][j][k] + + v->DPTEBytesPerRow[i][j][k]; + } + + for (k = 0; k < v->NumberOfActivePlanes; k++) { + CalculateFlipSchedule( + mode_lib, + k, + HostVMInefficiencyFactor, + v->ExtraLatency, + v->UrgLatency[i], + v->PDEAndMetaPTEBytesPerFrame[i][j][k], + v->MetaRowBytes[i][j][k], + v->DPTEBytesPerRow[i][j][k]); + } + v->total_dcn_read_bw_with_flip = 0.0; + for (k = 0; k < v->NumberOfActivePlanes; k++) { + v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip + + dml_max3( + v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k], + v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k] + + v->VActiveCursorBandwidth[i][j][k], + v->NoOfDPP[i][j][k] + * (v->final_flip_bw[k] + + v->RequiredPrefetchPixelDataBWLuma[i][j][k] + * v->UrgentBurstFactorLumaPre[k] + + v->RequiredPrefetchPixelDataBWChroma[i][j][k] + * v->UrgentBurstFactorChromaPre[k]) + + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]); + } + v->ImmediateFlipSupportedForState[i][j] = true; + if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) { + v->ImmediateFlipSupportedForState[i][j] = false; + } + for (k = 0; k < v->NumberOfActivePlanes; k++) { + if (v->ImmediateFlipSupportedForPipe[k] == false) { + v->ImmediateFlipSupportedForState[i][j] = false; + } + } + } else { + v->ImmediateFlipSupportedForState[i][j] = false; + } + + if (v->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || v->AnyLinesForVMOrRowTooLarge == false) { + v->NextMaxVStartup = v->MaxMaxVStartup[i][j]; + NextPrefetchModeState = NextPrefetchModeState + 1; + } else { + v->NextMaxVStartup = v->NextMaxVStartup - 1; + } + v->NextPrefetchMode = v->NextPrefetchMode + 1; + } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true + && ((v->HostVMEnable == false && + v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) + || v->ImmediateFlipSupportedForState[i][j] == true)) + || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode))); + + CalculateUnboundedRequestAndCompressedBufferSize( + v->DETBufferSizeInKByte[0], + v->ConfigReturnBufferSizeInKByte, + v->UseUnboundedRequesting, + v->TotalNumberOfActiveDPP[i][j], + NoChroma, + v->MaxNumDPP, + v->CompressedBufferSegmentSizeInkByte, + v->Output, + &UnboundedRequestEnabledThisState, + &CompressedBufferSizeInkByteThisState); + + CalculateWatermarksAndDRAMSpeedChangeSupport( + mode_lib, + v->PrefetchModePerState[i][j], + v->DCFCLKState[i][j], + v->ReturnBWPerState[i][j], + v->UrgLatency[i], + v->ExtraLatency, + v->SOCCLKPerState[i], + v->ProjectedDCFCLKDeepSleep[i][j], + v->DETBufferSizeYThisState, + v->DETBufferSizeCThisState, + v->SwathHeightYThisState, + v->SwathHeightCThisState, + v->SwathWidthYThisState, + v->SwathWidthCThisState, + v->NoOfDPPThisState, + v->BytePerPixelInDETY, + v->BytePerPixelInDETC, + UnboundedRequestEnabledThisState, + CompressedBufferSizeInkByteThisState, + &v->DRAMClockChangeSupport[i][j], + &dummy, + &dummy, + &dummy, + &dummy); + } + } + + /*PTE Buffer Size Check*/ + for (i = 0; i < v->soc.num_states; i++) { + for (j = 0; j < 2; j++) { + v->PTEBufferSizeNotExceeded[i][j] = true; + for (k = 0; k < v->NumberOfActivePlanes; k++) { + if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) { + v->PTEBufferSizeNotExceeded[i][j] = false; + } + } + } + } + + /*Cursor Support Check*/ + v->CursorSupport = true; + for (k = 0; k < v->NumberOfActivePlanes; k++) { + if (v->CursorWidth[k][0] > 0.0) { + if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) { + v->CursorSupport = false; + } + } + } + + /*Valid Pitch Check*/ + v->PitchSupport = true; + for (k = 0; k < v->NumberOfActivePlanes; k++) { + v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]); + if (v->DCCEnable[k] == true) { + v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]); + } else { + v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k]; + } + if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16 + && v->SourcePixelFormat[k] != dm_mono_16 && v->SourcePixelFormat[k] != dm_rgbe + && v->SourcePixelFormat[k] != dm_mono_8) { + v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]); + if (v->DCCEnable[k] == true) { + v->AlignedDCCMetaPitchC[k] = dml_ceil( + dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]), + 64.0 * v->Read256BlockWidthC[k]); + } else { + v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k]; + } + } else { + v->AlignedCPitch[k] = v->PitchC[k]; + v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k]; + } + if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k] + || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k] || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) { + v->PitchSupport = false; + } + } + + for (k = 0; k < v->NumberOfActivePlanes; k++) { + if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k]) { + ViewportExceedsSurface = true; + if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 + && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_444_8 + && v->SourcePixelFormat[k] != dm_rgbe) { + if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k] + || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) { + ViewportExceedsSurface = true; + } + } + } + } + + /*Mode Support, Voltage State and SOC Configuration*/ + for (i = v->soc.num_states - 1; i >= 0; i--) { + for (j = 0; j < 2; j++) { + if (v->ScaleRatioAndTapsSupport == true && v->SourceFormatPixelAndScanSupport == true && v->ViewportSizeSupport[i][j] == true + && v->LinkCapacitySupport[i] == true && !P2IWith420 && !DSCOnlyIfNecessaryWithBPP + && !DSC422NativeNotSupported && v->ODMCombine4To1SupportCheckOK[i] == true && v->NotEnoughDSCUnits[i] == false + && v->DTBCLKRequiredMoreThanSupported[i] == false + && v->ROBSupport[i][j] == true && v->DISPCLK_DPPCLK_Support[i][j] == true + && v->TotalAvailablePipesSupport[i][j] == true && EnoughWritebackUnits == true + && v->WritebackLatencySupport == true && v->WritebackScaleRatioAndTapsSupport == true + && v->CursorSupport == true && v->PitchSupport == true && ViewportExceedsSurface == false + && v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true + && v->TotalVerticalActiveBandwidthSupport[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true + && v->PTEBufferSizeNotExceeded[i][j] == true && v->NonsupportedDSCInputBPC == false + && ((v->HostVMEnable == false + && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) + || v->ImmediateFlipSupportedForState[i][j] == true) + && FMTBufferExceeded == false) { + v->ModeSupport[i][j] = true; + } else { + v->ModeSupport[i][j] = false; + } + } + } + + { + unsigned int MaximumMPCCombine = 0; + for (i = v->soc.num_states; i >= 0; i--) { + if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) { + v->VoltageLevel = i; + v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true; + if (v->ModeSupport[i][0] == true) { + MaximumMPCCombine = 0; + } else { + MaximumMPCCombine = 1; + } + } + } + v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine]; + for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) { + v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k]; + v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k]; + } + v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine]; + v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel]; + v->FabricClock = v->FabricClockPerState[v->VoltageLevel]; + v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel]; + v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine]; + v->maxMpcComb = MaximumMPCCombine; + } +} + +static void CalculateWatermarksAndDRAMSpeedChangeSupport( + struct display_mode_lib *mode_lib, + unsigned int PrefetchMode, + double DCFCLK, + double ReturnBW, + double UrgentLatency, + double ExtraLatency, + double SOCCLK, + double DCFCLKDeepSleep, + unsigned int DETBufferSizeY[], + unsigned int DETBufferSizeC[], + unsigned int SwathHeightY[], + unsigned int SwathHeightC[], + double SwathWidthY[], + double SwathWidthC[], + unsigned int DPPPerPlane[], + double BytePerPixelDETY[], + double BytePerPixelDETC[], + bool UnboundedRequestEnabled, + int unsigned CompressedBufferSizeInkByte, + enum clock_change_support *DRAMClockChangeSupport, + double *StutterExitWatermark, + double *StutterEnterPlusExitWatermark, + double *Z8StutterExitWatermark, + double *Z8StutterEnterPlusExitWatermark) +{ + struct vba_vars_st *v = &mode_lib->vba; + double EffectiveLBLatencyHidingY; + double EffectiveLBLatencyHidingC; + double LinesInDETY[DC__NUM_DPP__MAX]; + double LinesInDETC; + unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX]; + unsigned int LinesInDETCRoundedDownToSwath; + double FullDETBufferingTimeY; + double FullDETBufferingTimeC; + double ActiveDRAMClockChangeLatencyMarginY; + double ActiveDRAMClockChangeLatencyMarginC; + double WritebackDRAMClockChangeLatencyMargin; + double PlaneWithMinActiveDRAMClockChangeMargin; + double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank; + double WritebackDRAMClockChangeLatencyHiding; + double TotalPixelBW = 0.0; + int k, j; + + v->UrgentWatermark = UrgentLatency + ExtraLatency; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency); + dml_print("DML::%s: ExtraLatency = %f\n", __func__, ExtraLatency); + dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->UrgentWatermark); +#endif + + v->DRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->UrgentWatermark; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: v->DRAMClockChangeLatency = %f\n", __func__, v->DRAMClockChangeLatency); + dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->DRAMClockChangeWatermark); +#endif + + v->TotalActiveWriteback = 0; + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + if (v->WritebackEnable[k] == true) { + v->TotalActiveWriteback = v->TotalActiveWriteback + 1; + } + } + + if (v->TotalActiveWriteback <= 1) { + v->WritebackUrgentWatermark = v->WritebackLatency; + } else { + v->WritebackUrgentWatermark = v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; + } + + if (v->TotalActiveWriteback <= 1) { + v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency; + } else { + v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; + } + + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + TotalPixelBW = TotalPixelBW + + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k]) + / (v->HTotal[k] / v->PixelClock[k]); + } + + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + double EffectiveDETBufferSizeY = DETBufferSizeY[k]; + + v->LBLatencyHidingSourceLinesY = dml_min( + (double) v->MaxLineBufferLines, + dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1); + + v->LBLatencyHidingSourceLinesC = dml_min( + (double) v->MaxLineBufferLines, + dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1); + + EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]); + + EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]); + + if (UnboundedRequestEnabled) { + EffectiveDETBufferSizeY = EffectiveDETBufferSizeY + + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW; + } + + LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k]; + LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]); + FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k]; + if (BytePerPixelDETC[k] > 0) { + LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k]; + LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]); + FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (v->HTotal[k] / v->PixelClock[k]) / v->VRatioChroma[k]; + } else { + LinesInDETC = 0; + FullDETBufferingTimeC = 999999; + } + + ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY + - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark; + + if (v->NumberOfActivePlanes > 1) { + ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY + - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightY[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatio[k]; + } + + if (BytePerPixelDETC[k] > 0) { + ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC + - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark; + + if (v->NumberOfActivePlanes > 1) { + ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC + - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightC[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatioChroma[k]; + } + v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC); + } else { + v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY; + } + + if (v->WritebackEnable[k] == true) { + WritebackDRAMClockChangeLatencyHiding = v->WritebackInterfaceBufferSize * 1024 + / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4); + if (v->WritebackPixelFormat[k] == dm_444_64) { + WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2; + } + WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark; + v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(v->ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin); + } + } + + v->MinActiveDRAMClockChangeMargin = 999999; + PlaneWithMinActiveDRAMClockChangeMargin = 0; + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) { + v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k]; + if (v->BlendingAndTiming[k] == k) { + PlaneWithMinActiveDRAMClockChangeMargin = k; + } else { + for (j = 0; j < v->NumberOfActivePlanes; ++j) { + if (v->BlendingAndTiming[k] == j) { + PlaneWithMinActiveDRAMClockChangeMargin = j; + } + } + } + } + } + + v->MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + v->DRAMClockChangeLatency ; + + SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999; + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (v->BlendingAndTiming[k] == k)) && !(v->BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin) + && v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) { + SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k]; + } + } + + v->TotalNumberOfActiveOTG = 0; + + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + if (v->BlendingAndTiming[k] == k) { + v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1; + } + } + + if (v->MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) { + *DRAMClockChangeSupport = dm_dram_clock_change_vactive; + } else if ((v->SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1 + || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0) { + *DRAMClockChangeSupport = dm_dram_clock_change_vblank; + } else { + *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; + } + + *StutterExitWatermark = v->SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep; + *StutterEnterPlusExitWatermark = (v->SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep); + *Z8StutterExitWatermark = v->SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep; + *Z8StutterEnterPlusExitWatermark = v->SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, *StutterExitWatermark); + dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, *StutterEnterPlusExitWatermark); + dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, *Z8StutterExitWatermark); + dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, *Z8StutterEnterPlusExitWatermark); +#endif +} + +static void CalculateDCFCLKDeepSleep( + struct display_mode_lib *mode_lib, + unsigned int NumberOfActivePlanes, + int BytePerPixelY[], + int BytePerPixelC[], + double VRatio[], + double VRatioChroma[], + double SwathWidthY[], + double SwathWidthC[], + unsigned int DPPPerPlane[], + double HRatio[], + double HRatioChroma[], + double PixelClock[], + double PSCL_THROUGHPUT[], + double PSCL_THROUGHPUT_CHROMA[], + double DPPCLK[], + double ReadBandwidthLuma[], + double ReadBandwidthChroma[], + int ReturnBusWidth, + double *DCFCLKDeepSleep) +{ + struct vba_vars_st *v = &mode_lib->vba; + double DisplayPipeLineDeliveryTimeLuma; + double DisplayPipeLineDeliveryTimeChroma; + double ReadBandwidth = 0.0; + int k; + + for (k = 0; k < NumberOfActivePlanes; ++k) { + + if (VRatio[k] <= 1) { + DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k]; + } else { + DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k]; + } + if (BytePerPixelC[k] == 0) { + DisplayPipeLineDeliveryTimeChroma = 0; + } else { + if (VRatioChroma[k] <= 1) { + DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k]; + } else { + DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k]; + } + } + + if (BytePerPixelC[k] > 0) { + v->DCFCLKDeepSleepPerPlane[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma, + __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma); + } else { + v->DCFCLKDeepSleepPerPlane[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma; + } + v->DCFCLKDeepSleepPerPlane[k] = dml_max(v->DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16); + + } + + for (k = 0; k < NumberOfActivePlanes; ++k) { + ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; + } + + *DCFCLKDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / ReturnBusWidth); + + for (k = 0; k < NumberOfActivePlanes; ++k) { + *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, v->DCFCLKDeepSleepPerPlane[k]); + } +} + +static void CalculateUrgentBurstFactor( + int swath_width_luma_ub, + int swath_width_chroma_ub, + unsigned int SwathHeightY, + unsigned int SwathHeightC, + double LineTime, + double UrgentLatency, + double CursorBufferSize, + unsigned int CursorWidth, + unsigned int CursorBPP, + double VRatio, + double VRatioC, + double BytePerPixelInDETY, + double BytePerPixelInDETC, + double DETBufferSizeY, + double DETBufferSizeC, + double *UrgentBurstFactorCursor, + double *UrgentBurstFactorLuma, + double *UrgentBurstFactorChroma, + bool *NotEnoughUrgentLatencyHiding) +{ + double LinesInDETLuma; + double LinesInDETChroma; + unsigned int LinesInCursorBuffer; + double CursorBufferSizeInTime; + double DETBufferSizeInTimeLuma; + double DETBufferSizeInTimeChroma; + + *NotEnoughUrgentLatencyHiding = 0; + + if (CursorWidth > 0) { + LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0); + if (VRatio > 0) { + CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio; + if (CursorBufferSizeInTime - UrgentLatency <= 0) { + *NotEnoughUrgentLatencyHiding = 1; + *UrgentBurstFactorCursor = 0; + } else { + *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency); + } + } else { + *UrgentBurstFactorCursor = 1; + } + } + + LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub; + if (VRatio > 0) { + DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio; + if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) { + *NotEnoughUrgentLatencyHiding = 1; + *UrgentBurstFactorLuma = 0; + } else { + *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency); + } + } else { + *UrgentBurstFactorLuma = 1; + } + + if (BytePerPixelInDETC > 0) { + LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub; + if (VRatio > 0) { + DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio; + if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) { + *NotEnoughUrgentLatencyHiding = 1; + *UrgentBurstFactorChroma = 0; + } else { + *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency); + } + } else { + *UrgentBurstFactorChroma = 1; + } + } +} + +static void CalculatePixelDeliveryTimes( + unsigned int NumberOfActivePlanes, + double VRatio[], + double VRatioChroma[], + double VRatioPrefetchY[], + double VRatioPrefetchC[], + unsigned int swath_width_luma_ub[], + unsigned int swath_width_chroma_ub[], + unsigned int DPPPerPlane[], + double HRatio[], + double HRatioChroma[], + double PixelClock[], + double PSCL_THROUGHPUT[], + double PSCL_THROUGHPUT_CHROMA[], + double DPPCLK[], + int BytePerPixelC[], + enum scan_direction_class SourceScan[], + unsigned int NumberOfCursors[], + unsigned int CursorWidth[][DC__NUM_CURSOR__MAX], + unsigned int CursorBPP[][DC__NUM_CURSOR__MAX], + unsigned int BlockWidth256BytesY[], + unsigned int BlockHeight256BytesY[], + unsigned int BlockWidth256BytesC[], + unsigned int BlockHeight256BytesC[], + double DisplayPipeLineDeliveryTimeLuma[], + double DisplayPipeLineDeliveryTimeChroma[], + double DisplayPipeLineDeliveryTimeLumaPrefetch[], + double DisplayPipeLineDeliveryTimeChromaPrefetch[], + double DisplayPipeRequestDeliveryTimeLuma[], + double DisplayPipeRequestDeliveryTimeChroma[], + double DisplayPipeRequestDeliveryTimeLumaPrefetch[], + double DisplayPipeRequestDeliveryTimeChromaPrefetch[], + double CursorRequestDeliveryTime[], + double CursorRequestDeliveryTimePrefetch[]) +{ + double req_per_swath_ub; + int k; + + for (k = 0; k < NumberOfActivePlanes; ++k) { + if (VRatio[k] <= 1) { + DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k]; + } else { + DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k]; + } + + if (BytePerPixelC[k] == 0) { + DisplayPipeLineDeliveryTimeChroma[k] = 0; + } else { + if (VRatioChroma[k] <= 1) { + DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k]; + } else { + DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k]; + } + } + + if (VRatioPrefetchY[k] <= 1) { + DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k]; + } else { + DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k]; + } + + if (BytePerPixelC[k] == 0) { + DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0; + } else { + if (VRatioPrefetchC[k] <= 1) { + DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k]; + } else { + DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k]; + } + } + } + + for (k = 0; k < NumberOfActivePlanes; ++k) { + if (SourceScan[k] != dm_vert) { + req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k]; + } else { + req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k]; + } + DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub; + DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub; + if (BytePerPixelC[k] == 0) { + DisplayPipeRequestDeliveryTimeChroma[k] = 0; + DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0; + } else { + if (SourceScan[k] != dm_vert) { + req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k]; + } else { + req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k]; + } + DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub; + DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]); + dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]); + dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]); + dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]); + dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]); + dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]); + dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]); + dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]); + dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]); + dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]); + dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]); + dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]); +#endif + } + + for (k = 0; k < NumberOfActivePlanes; ++k) { + int cursor_req_per_width; + cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1); + if (NumberOfCursors[k] > 0) { + if (VRatio[k] <= 1) { + CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width; + } else { + CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width; + } + if (VRatioPrefetchY[k] <= 1) { + CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width; + } else { + CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width; + } + } else { + CursorRequestDeliveryTime[k] = 0; + CursorRequestDeliveryTimePrefetch[k] = 0; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", __func__, k, NumberOfCursors[k]); + dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]); + dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]); +#endif + } +} + +static void CalculateMetaAndPTETimes( + int NumberOfActivePlanes, + bool GPUVMEnable, + int MetaChunkSize, + int MinMetaChunkSizeBytes, + int HTotal[], + double VRatio[], + double VRatioChroma[], + double DestinationLinesToRequestRowInVBlank[], + double DestinationLinesToRequestRowInImmediateFlip[], + bool DCCEnable[], + double PixelClock[], + int BytePerPixelY[], + int BytePerPixelC[], + enum scan_direction_class SourceScan[], + int dpte_row_height[], + int dpte_row_height_chroma[], + int meta_row_width[], + int meta_row_width_chroma[], + int meta_row_height[], + int meta_row_height_chroma[], + int meta_req_width[], + int meta_req_width_chroma[], + int meta_req_height[], + int meta_req_height_chroma[], + int dpte_group_bytes[], + int PTERequestSizeY[], + int PTERequestSizeC[], + int PixelPTEReqWidthY[], + int PixelPTEReqHeightY[], + int PixelPTEReqWidthC[], + int PixelPTEReqHeightC[], + int dpte_row_width_luma_ub[], + int dpte_row_width_chroma_ub[], + double DST_Y_PER_PTE_ROW_NOM_L[], + double DST_Y_PER_PTE_ROW_NOM_C[], + double DST_Y_PER_META_ROW_NOM_L[], + double DST_Y_PER_META_ROW_NOM_C[], + double TimePerMetaChunkNominal[], + double TimePerChromaMetaChunkNominal[], + double TimePerMetaChunkVBlank[], + double TimePerChromaMetaChunkVBlank[], + double TimePerMetaChunkFlip[], + double TimePerChromaMetaChunkFlip[], + double time_per_pte_group_nom_luma[], + double time_per_pte_group_vblank_luma[], + double time_per_pte_group_flip_luma[], + double time_per_pte_group_nom_chroma[], + double time_per_pte_group_vblank_chroma[], + double time_per_pte_group_flip_chroma[]) +{ + unsigned int meta_chunk_width; + unsigned int min_meta_chunk_width; + unsigned int meta_chunk_per_row_int; + unsigned int meta_row_remainder; + unsigned int meta_chunk_threshold; + unsigned int meta_chunks_per_row_ub; + unsigned int meta_chunk_width_chroma; + unsigned int min_meta_chunk_width_chroma; + unsigned int meta_chunk_per_row_int_chroma; + unsigned int meta_row_remainder_chroma; + unsigned int meta_chunk_threshold_chroma; + unsigned int meta_chunks_per_row_ub_chroma; + unsigned int dpte_group_width_luma; + unsigned int dpte_groups_per_row_luma_ub; + unsigned int dpte_group_width_chroma; + unsigned int dpte_groups_per_row_chroma_ub; + int k; + + for (k = 0; k < NumberOfActivePlanes; ++k) { + DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k]; + if (BytePerPixelC[k] == 0) { + DST_Y_PER_PTE_ROW_NOM_C[k] = 0; + } else { + DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k]; + } + DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k]; + if (BytePerPixelC[k] == 0) { + DST_Y_PER_META_ROW_NOM_C[k] = 0; + } else { + DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k]; + } + } + + for (k = 0; k < NumberOfActivePlanes; ++k) { + if (DCCEnable[k] == true) { + meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k]; + min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k]; + meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width; + meta_row_remainder = meta_row_width[k] % meta_chunk_width; + if (SourceScan[k] != dm_vert) { + meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k]; + } else { + meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k]; + } + if (meta_row_remainder <= meta_chunk_threshold) { + meta_chunks_per_row_ub = meta_chunk_per_row_int + 1; + } else { + meta_chunks_per_row_ub = meta_chunk_per_row_int + 2; + } + TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; + TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; + TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; + if (BytePerPixelC[k] == 0) { + TimePerChromaMetaChunkNominal[k] = 0; + TimePerChromaMetaChunkVBlank[k] = 0; + TimePerChromaMetaChunkFlip[k] = 0; + } else { + meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k]; + min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k]; + meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma; + meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma; + if (SourceScan[k] != dm_vert) { + meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k]; + } else { + meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k]; + } + if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) { + meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1; + } else { + meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2; + } + TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; + TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; + TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; + } + } else { + TimePerMetaChunkNominal[k] = 0; + TimePerMetaChunkVBlank[k] = 0; + TimePerMetaChunkFlip[k] = 0; + TimePerChromaMetaChunkNominal[k] = 0; + TimePerChromaMetaChunkVBlank[k] = 0; + TimePerChromaMetaChunkFlip[k] = 0; + } + } + + for (k = 0; k < NumberOfActivePlanes; ++k) { + if (GPUVMEnable == true) { + if (SourceScan[k] != dm_vert) { + dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k]; + } else { + dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k]; + } + dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1); + time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; + time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; + time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; + if (BytePerPixelC[k] == 0) { + time_per_pte_group_nom_chroma[k] = 0; + time_per_pte_group_vblank_chroma[k] = 0; + time_per_pte_group_flip_chroma[k] = 0; + } else { + if (SourceScan[k] != dm_vert) { + dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k]; + } else { + dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k]; + } + dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1); + time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; + time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; + time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; + } + } else { + time_per_pte_group_nom_luma[k] = 0; + time_per_pte_group_vblank_luma[k] = 0; + time_per_pte_group_flip_luma[k] = 0; + time_per_pte_group_nom_chroma[k] = 0; + time_per_pte_group_vblank_chroma[k] = 0; + time_per_pte_group_flip_chroma[k] = 0; + } + } +} + +static void CalculateVMGroupAndRequestTimes( + unsigned int NumberOfActivePlanes, + bool GPUVMEnable, + unsigned int GPUVMMaxPageTableLevels, + unsigned int HTotal[], + int BytePerPixelC[], + double DestinationLinesToRequestVMInVBlank[], + double DestinationLinesToRequestVMInImmediateFlip[], + bool DCCEnable[], + double PixelClock[], + int dpte_row_width_luma_ub[], + int dpte_row_width_chroma_ub[], + int vm_group_bytes[], + unsigned int dpde0_bytes_per_frame_ub_l[], + unsigned int dpde0_bytes_per_frame_ub_c[], + int meta_pte_bytes_per_frame_ub_l[], + int meta_pte_bytes_per_frame_ub_c[], + double TimePerVMGroupVBlank[], + double TimePerVMGroupFlip[], + double TimePerVMRequestVBlank[], + double TimePerVMRequestFlip[]) +{ + int num_group_per_lower_vm_stage; + int num_req_per_lower_vm_stage; + int k; + + for (k = 0; k < NumberOfActivePlanes; ++k) { + if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) { + if (DCCEnable[k] == false) { + if (BytePerPixelC[k] > 0) { + num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) + + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); + } else { + num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); + } + } else { + if (GPUVMMaxPageTableLevels == 1) { + if (BytePerPixelC[k] > 0) { + num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) + + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); + } else { + num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); + } + } else { + if (BytePerPixelC[k] > 0) { + num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) + + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1) + + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) + + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); + } else { + num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) + + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); + } + } + } + + if (DCCEnable[k] == false) { + if (BytePerPixelC[k] > 0) { + num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64; + } else { + num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64; + } + } else { + if (GPUVMMaxPageTableLevels == 1) { + if (BytePerPixelC[k] > 0) { + num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64; + } else { + num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64; + } + } else { + if (BytePerPixelC[k] > 0) { + num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64 + + meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64; + } else { + num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64; + } + } + } + + TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; + TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; + TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; + TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; + + if (GPUVMMaxPageTableLevels > 2) { + TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2; + TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2; + TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2; + TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2; + } + + } else { + TimePerVMGroupVBlank[k] = 0; + TimePerVMGroupFlip[k] = 0; + TimePerVMRequestVBlank[k] = 0; + TimePerVMRequestFlip[k] = 0; + } + } +} + +static void CalculateStutterEfficiency( + struct display_mode_lib *mode_lib, + int CompressedBufferSizeInkByte, + bool UnboundedRequestEnabled, + int ConfigReturnBufferSizeInKByte, + int MetaFIFOSizeInKEntries, + int ZeroSizeBufferEntries, + int NumberOfActivePlanes, + int ROBBufferSizeInKByte, + double TotalDataReadBandwidth, + double DCFCLK, + double ReturnBW, + double COMPBUF_RESERVED_SPACE_64B, + double COMPBUF_RESERVED_SPACE_ZS, + double SRExitTime, + double SRExitZ8Time, + bool SynchronizedVBlank, + double Z8StutterEnterPlusExitWatermark, + double StutterEnterPlusExitWatermark, + bool ProgressiveToInterlaceUnitInOPP, + bool Interlace[], + double MinTTUVBlank[], + int DPPPerPlane[], + unsigned int DETBufferSizeY[], + int BytePerPixelY[], + double BytePerPixelDETY[], + double SwathWidthY[], + int SwathHeightY[], + int SwathHeightC[], + double NetDCCRateLuma[], + double NetDCCRateChroma[], + double DCCFractionOfZeroSizeRequestsLuma[], + double DCCFractionOfZeroSizeRequestsChroma[], + int HTotal[], + int VTotal[], + double PixelClock[], + double VRatio[], + enum scan_direction_class SourceScan[], + int BlockHeight256BytesY[], + int BlockWidth256BytesY[], + int BlockHeight256BytesC[], + int BlockWidth256BytesC[], + int DCCYMaxUncompressedBlock[], + int DCCCMaxUncompressedBlock[], + int VActive[], + bool DCCEnable[], + bool WritebackEnable[], + double ReadBandwidthPlaneLuma[], + double ReadBandwidthPlaneChroma[], + double meta_row_bw[], + double dpte_row_bw[], + double *StutterEfficiencyNotIncludingVBlank, + double *StutterEfficiency, + int *NumberOfStutterBurstsPerFrame, + double *Z8StutterEfficiencyNotIncludingVBlank, + double *Z8StutterEfficiency, + int *Z8NumberOfStutterBurstsPerFrame, + double *StutterPeriod) +{ + struct vba_vars_st *v = &mode_lib->vba; + + double DETBufferingTimeY; + double SwathWidthYCriticalPlane = 0; + double VActiveTimeCriticalPlane = 0; + double FrameTimeCriticalPlane = 0; + int BytePerPixelYCriticalPlane = 0; + double LinesToFinishSwathTransferStutterCriticalPlane = 0; + double MinTTUVBlankCriticalPlane = 0; + double TotalCompressedReadBandwidth; + double TotalRowReadBandwidth; + double AverageDCCCompressionRate; + double EffectiveCompressedBufferSize; + double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer; + double StutterBurstTime; + int TotalActiveWriteback; + double LinesInDETY; + double LinesInDETYRoundedDownToSwath; + double MaximumEffectiveCompressionLuma; + double MaximumEffectiveCompressionChroma; + double TotalZeroSizeRequestReadBandwidth; + double TotalZeroSizeCompressedReadBandwidth; + double AverageDCCZeroSizeFraction; + double AverageZeroSizeCompressionRate; + int TotalNumberOfActiveOTG = 0; + double LastStutterPeriod = 0.0; + double LastZ8StutterPeriod = 0.0; + int k; + + TotalZeroSizeRequestReadBandwidth = 0; + TotalZeroSizeCompressedReadBandwidth = 0; + TotalRowReadBandwidth = 0; + TotalCompressedReadBandwidth = 0; + + for (k = 0; k < NumberOfActivePlanes; ++k) { + if (DCCEnable[k] == true) { + if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k]) || (SourceScan[k] != dm_vert && BlockHeight256BytesY[k] > SwathHeightY[k]) + || DCCYMaxUncompressedBlock[k] < 256) { + MaximumEffectiveCompressionLuma = 2; + } else { + MaximumEffectiveCompressionLuma = 4; + } + TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(NetDCCRateLuma[k], MaximumEffectiveCompressionLuma); + TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]; + TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth + + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] / MaximumEffectiveCompressionLuma; + if (ReadBandwidthPlaneChroma[k] > 0) { + if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k]) + || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k]) || DCCCMaxUncompressedBlock[k] < 256) { + MaximumEffectiveCompressionChroma = 2; + } else { + MaximumEffectiveCompressionChroma = 4; + } + TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + + ReadBandwidthPlaneChroma[k] / dml_min(NetDCCRateChroma[k], MaximumEffectiveCompressionChroma); + TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k]; + TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth + + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k] / MaximumEffectiveCompressionChroma; + } + } else { + TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k]; + } + TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]); + } + + AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth; + AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth); + dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth); + dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, TotalZeroSizeCompressedReadBandwidth); + dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma); + dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma); + dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); + dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction); + dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte); +#endif + + if (AverageDCCZeroSizeFraction == 1) { + AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth; + EffectiveCompressedBufferSize = MetaFIFOSizeInKEntries * 1024 * 64 * AverageZeroSizeCompressionRate + (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 * AverageZeroSizeCompressionRate; + } else if (AverageDCCZeroSizeFraction > 0) { + AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth; + EffectiveCompressedBufferSize = dml_min( + CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, + MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate)) + + dml_min((ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate, + (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); + dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); + dml_print( + "DML::%s: min 2 = %f\n", + __func__, + MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate)); + dml_print("DML::%s: min 3 = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate); + dml_print("DML::%s: min 4 = %f\n", __func__, ZeroSizeBufferEntries * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); + } else { + EffectiveCompressedBufferSize = dml_min( + CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, + MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) + (ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate; + dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); + dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate); + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries); + dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate); + dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); +#endif + + *StutterPeriod = 0; + for (k = 0; k < NumberOfActivePlanes; ++k) { + LinesInDETY = (DETBufferSizeY[k] + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) * ReadBandwidthPlaneLuma[k] / TotalDataReadBandwidth) + / BytePerPixelDETY[k] / SwathWidthY[k]; + LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]); + DETBufferingTimeY = LinesInDETYRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatio[k]; +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%0d DETBufferSizeY = %f\n", __func__, k, DETBufferSizeY[k]); + dml_print("DML::%s: k=%0d BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]); + dml_print("DML::%s: k=%0d SwathWidthY = %f\n", __func__, k, SwathWidthY[k]); + dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma = %f\n", __func__, k, ReadBandwidthPlaneLuma[k]); + dml_print("DML::%s: k=%0d TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth); + dml_print("DML::%s: k=%0d LinesInDETY = %f\n", __func__, k, LinesInDETY); + dml_print("DML::%s: k=%0d LinesInDETYRoundedDownToSwath = %f\n", __func__, k, LinesInDETYRoundedDownToSwath); + dml_print("DML::%s: k=%0d HTotal = %d\n", __func__, k, HTotal[k]); + dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]); + dml_print("DML::%s: k=%0d VRatio = %f\n", __func__, k, VRatio[k]); + dml_print("DML::%s: k=%0d DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY); + dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]); +#endif + + if (k == 0 || DETBufferingTimeY < *StutterPeriod) { + bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP; + + *StutterPeriod = DETBufferingTimeY; + FrameTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VTotal[k] / 2.0, 1.0) : VTotal[k]) * HTotal[k] / PixelClock[k]; + VActiveTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VActive[k] / 2.0, 1.0) : VActive[k]) * HTotal[k] / PixelClock[k]; + BytePerPixelYCriticalPlane = BytePerPixelY[k]; + SwathWidthYCriticalPlane = SwathWidthY[k]; + LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k] - (LinesInDETY - LinesInDETYRoundedDownToSwath); + MinTTUVBlankCriticalPlane = MinTTUVBlank[k]; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); + dml_print("DML::%s: MinTTUVBlankCriticalPlane = %f\n", __func__, MinTTUVBlankCriticalPlane); + dml_print("DML::%s: FrameTimeCriticalPlane = %f\n", __func__, FrameTimeCriticalPlane); + dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane); + dml_print("DML::%s: BytePerPixelYCriticalPlane = %d\n", __func__, BytePerPixelYCriticalPlane); + dml_print("DML::%s: SwathWidthYCriticalPlane = %f\n", __func__, SwathWidthYCriticalPlane); + dml_print("DML::%s: LinesToFinishSwathTransferStutterCriticalPlane = %f\n", __func__, LinesToFinishSwathTransferStutterCriticalPlane); +#endif + } + } + + PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth, EffectiveCompressedBufferSize); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte); + dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); + dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, *StutterPeriod * TotalDataReadBandwidth); + dml_print("DML::%s: ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize); + dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); + dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer); + dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW); + dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth); + dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth); + dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK); +#endif + + StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW + + (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64) + + *StutterPeriod * TotalRowReadBandwidth / ReturnBW; +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW); + dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth)); + dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)); + dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW); + dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); +#endif + StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW); + + dml_print( + "DML::%s: Time to finish residue swath=%f\n", + __func__, + LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW); + + TotalActiveWriteback = 0; + for (k = 0; k < NumberOfActivePlanes; ++k) { + if (WritebackEnable[k]) { + TotalActiveWriteback = TotalActiveWriteback + 1; + } + } + + if (TotalActiveWriteback == 0) { +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime); + dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time); + dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime); + dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); +#endif + *StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100; + *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100; + *NumberOfStutterBurstsPerFrame = (*StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0); + *Z8NumberOfStutterBurstsPerFrame = (*Z8StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0); + } else { + *StutterEfficiencyNotIncludingVBlank = 0.; + *Z8StutterEfficiencyNotIncludingVBlank = 0.; + *NumberOfStutterBurstsPerFrame = 0; + *Z8NumberOfStutterBurstsPerFrame = 0; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane); + dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank); + dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *Z8StutterEfficiencyNotIncludingVBlank); + dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame); + dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame); +#endif + + for (k = 0; k < NumberOfActivePlanes; ++k) { + if (v->BlendingAndTiming[k] == k) { + TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1; + } + } + + if (*StutterEfficiencyNotIncludingVBlank > 0) { + LastStutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod; + + if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastStutterPeriod + MinTTUVBlankCriticalPlane > StutterEnterPlusExitWatermark) { + *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime + StutterBurstTime * VActiveTimeCriticalPlane + / *StutterPeriod) / FrameTimeCriticalPlane) * 100; + } else { + *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank; + } + } else { + *StutterEfficiency = 0; + } + + if (*Z8StutterEfficiencyNotIncludingVBlank > 0) { + LastZ8StutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod; + if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastZ8StutterPeriod + MinTTUVBlankCriticalPlane > Z8StutterEnterPlusExitWatermark) { + *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime * VActiveTimeCriticalPlane + / *StutterPeriod) / FrameTimeCriticalPlane) * 100; + } else { + *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank; + } + } else { + *Z8StutterEfficiency = 0.; + } + + dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod); + dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark); + dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); + dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); + dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency); + dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency); + dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank); + dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame); +} + +static void CalculateSwathAndDETConfiguration( + bool ForceSingleDPP, + int NumberOfActivePlanes, + bool DETSharedByAllDPP, + unsigned int DETBufferSizeInKByteA[], + double MaximumSwathWidthLuma[], + double MaximumSwathWidthChroma[], + enum scan_direction_class SourceScan[], + enum source_format_class SourcePixelFormat[], + enum dm_swizzle_mode SurfaceTiling[], + int ViewportWidth[], + int ViewportHeight[], + int SurfaceWidthY[], + int SurfaceWidthC[], + int SurfaceHeightY[], + int SurfaceHeightC[], + int Read256BytesBlockHeightY[], + int Read256BytesBlockHeightC[], + int Read256BytesBlockWidthY[], + int Read256BytesBlockWidthC[], + enum odm_combine_mode ODMCombineEnabled[], + int BlendingAndTiming[], + int BytePerPixY[], + int BytePerPixC[], + double BytePerPixDETY[], + double BytePerPixDETC[], + int HActive[], + double HRatio[], + double HRatioChroma[], + int DPPPerPlane[], + int swath_width_luma_ub[], + int swath_width_chroma_ub[], + double SwathWidth[], + double SwathWidthChroma[], + int SwathHeightY[], + int SwathHeightC[], + unsigned int DETBufferSizeY[], + unsigned int DETBufferSizeC[], + bool ViewportSizeSupportPerPlane[], + bool *ViewportSizeSupport) +{ + int MaximumSwathHeightY[DC__NUM_DPP__MAX]; + int MaximumSwathHeightC[DC__NUM_DPP__MAX]; + int MinimumSwathHeightY; + int MinimumSwathHeightC; + int RoundedUpMaxSwathSizeBytesY; + int RoundedUpMaxSwathSizeBytesC; + int RoundedUpMinSwathSizeBytesY; + int RoundedUpMinSwathSizeBytesC; + int RoundedUpSwathSizeBytesY; + int RoundedUpSwathSizeBytesC; + double SwathWidthSingleDPP[DC__NUM_DPP__MAX]; + double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX]; + int k; + + CalculateSwathWidth( + ForceSingleDPP, + NumberOfActivePlanes, + SourcePixelFormat, + SourceScan, + ViewportWidth, + ViewportHeight, + SurfaceWidthY, + SurfaceWidthC, + SurfaceHeightY, + SurfaceHeightC, + ODMCombineEnabled, + BytePerPixY, + BytePerPixC, + Read256BytesBlockHeightY, + Read256BytesBlockHeightC, + Read256BytesBlockWidthY, + Read256BytesBlockWidthC, + BlendingAndTiming, + HActive, + HRatio, + DPPPerPlane, + SwathWidthSingleDPP, + SwathWidthSingleDPPChroma, + SwathWidth, + SwathWidthChroma, + MaximumSwathHeightY, + MaximumSwathHeightC, + swath_width_luma_ub, + swath_width_chroma_ub); + + *ViewportSizeSupport = true; + for (k = 0; k < NumberOfActivePlanes; ++k) { + unsigned int DETBufferSizeInKByte = DETBufferSizeInKByteA[k]; + + if (DETSharedByAllDPP && DPPPerPlane[k]) + DETBufferSizeInKByte /= DPPPerPlane[k]; + if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32 || SourcePixelFormat[k] == dm_444_16 || SourcePixelFormat[k] == dm_mono_16 + || SourcePixelFormat[k] == dm_mono_8 || SourcePixelFormat[k] == dm_rgbe)) { + if (SurfaceTiling[k] == dm_sw_linear + || (SourcePixelFormat[k] == dm_444_64 + && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x) + && SourceScan[k] != dm_vert)) { + MinimumSwathHeightY = MaximumSwathHeightY[k]; + } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) { + MinimumSwathHeightY = MaximumSwathHeightY[k]; + } else { + MinimumSwathHeightY = MaximumSwathHeightY[k] / 2; + } + MinimumSwathHeightC = MaximumSwathHeightC[k]; + } else { + if (SurfaceTiling[k] == dm_sw_linear) { + MinimumSwathHeightY = MaximumSwathHeightY[k]; + MinimumSwathHeightC = MaximumSwathHeightC[k]; + } else if (SourcePixelFormat[k] == dm_rgbe_alpha && SourceScan[k] == dm_vert) { + MinimumSwathHeightY = MaximumSwathHeightY[k] / 2; + MinimumSwathHeightC = MaximumSwathHeightC[k]; + } else if (SourcePixelFormat[k] == dm_rgbe_alpha) { + MinimumSwathHeightY = MaximumSwathHeightY[k] / 2; + MinimumSwathHeightC = MaximumSwathHeightC[k] / 2; + } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) { + MinimumSwathHeightY = MaximumSwathHeightY[k]; + MinimumSwathHeightC = MaximumSwathHeightC[k] / 2; + } else { + MinimumSwathHeightC = MaximumSwathHeightC[k] / 2; + MinimumSwathHeightY = MaximumSwathHeightY[k] / 2; + } + } + + RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k]; + RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MinimumSwathHeightY; + if (SourcePixelFormat[k] == dm_420_10) { + RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256); + RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256); + } + RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k]; + RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MinimumSwathHeightC; + if (SourcePixelFormat[k] == dm_420_10) { + RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256); + RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256); + } + + if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) { + SwathHeightY[k] = MaximumSwathHeightY[k]; + SwathHeightC[k] = MaximumSwathHeightC[k]; + RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY; + RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC; + } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC + && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) { + SwathHeightY[k] = MinimumSwathHeightY; + SwathHeightC[k] = MaximumSwathHeightC[k]; + RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY; + RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC; + } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC + && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) { + SwathHeightY[k] = MaximumSwathHeightY[k]; + SwathHeightC[k] = MinimumSwathHeightC; + RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY; + RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC; + } else { + SwathHeightY[k] = MinimumSwathHeightY; + SwathHeightC[k] = MinimumSwathHeightC; + RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY; + RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC; + } + { + double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64); + if (SwathHeightC[k] == 0) { + DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024; + DETBufferSizeC[k] = 0; + } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) { + DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024 / 2; + DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 2; + } else { + DETBufferSizeY[k] = dml_floor(actDETBufferSizeInKByte * 1024 * 2 / 3, 1024); + DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 3; + } + + if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC > actDETBufferSizeInKByte * 1024 / 2 || SwathWidth[k] > MaximumSwathWidthLuma[k] + || (SwathHeightC[k] > 0 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) { + *ViewportSizeSupport = false; + ViewportSizeSupportPerPlane[k] = false; + } else { + ViewportSizeSupportPerPlane[k] = true; + } + } + } +} + +static void CalculateSwathWidth( + bool ForceSingleDPP, + int NumberOfActivePlanes, + enum source_format_class SourcePixelFormat[], + enum scan_direction_class SourceScan[], + int ViewportWidth[], + int ViewportHeight[], + int SurfaceWidthY[], + int SurfaceWidthC[], + int SurfaceHeightY[], + int SurfaceHeightC[], + enum odm_combine_mode ODMCombineEnabled[], + int BytePerPixY[], + int BytePerPixC[], + int Read256BytesBlockHeightY[], + int Read256BytesBlockHeightC[], + int Read256BytesBlockWidthY[], + int Read256BytesBlockWidthC[], + int BlendingAndTiming[], + int HActive[], + double HRatio[], + int DPPPerPlane[], + double SwathWidthSingleDPPY[], + double SwathWidthSingleDPPC[], + double SwathWidthY[], + double SwathWidthC[], + int MaximumSwathHeightY[], + int MaximumSwathHeightC[], + int swath_width_luma_ub[], + int swath_width_chroma_ub[]) +{ + enum odm_combine_mode MainPlaneODMCombine; + int j, k; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: NumberOfActivePlanes = %d\n", __func__, NumberOfActivePlanes); +#endif + + for (k = 0; k < NumberOfActivePlanes; ++k) { + if (SourceScan[k] != dm_vert) { + SwathWidthSingleDPPY[k] = ViewportWidth[k]; + } else { + SwathWidthSingleDPPY[k] = ViewportHeight[k]; + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]); + dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]); +#endif + + MainPlaneODMCombine = ODMCombineEnabled[k]; + for (j = 0; j < NumberOfActivePlanes; ++j) { + if (BlendingAndTiming[k] == j) { + MainPlaneODMCombine = ODMCombineEnabled[j]; + } + } + + if (MainPlaneODMCombine == dm_odm_combine_mode_4to1) { + SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k])); + } else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1) { + SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k])); + } else if (DPPPerPlane[k] == 2) { + SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2; + } else { + SwathWidthY[k] = SwathWidthSingleDPPY[k]; + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%d SwathWidthSingleDPPY=%f\n", __func__, k, SwathWidthSingleDPPY[k]); + dml_print("DML::%s: k=%d SwathWidthY=%f\n", __func__, k, SwathWidthY[k]); +#endif + + if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) { + SwathWidthC[k] = SwathWidthY[k] / 2; + SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2; + } else { + SwathWidthC[k] = SwathWidthY[k]; + SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k]; + } + + if (ForceSingleDPP == true) { + SwathWidthY[k] = SwathWidthSingleDPPY[k]; + SwathWidthC[k] = SwathWidthSingleDPPC[k]; + } + { + int surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]); + int surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l); +#endif + + if (SourceScan[k] != dm_vert) { + MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k]; + MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k]; + swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]); + if (BytePerPixC[k] > 0) { + int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]); + + swath_width_chroma_ub[k] = dml_min( + surface_width_ub_c, + (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]); + } else { + swath_width_chroma_ub[k] = 0; + } + } else { + MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k]; + MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k]; + swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]); + if (BytePerPixC[k] > 0) { + int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]); + + swath_width_chroma_ub[k] = dml_min( + surface_height_ub_c, + (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]); + } else { + swath_width_chroma_ub[k] = 0; + } + } + } + } +} + +static double CalculateExtraLatency( + int RoundTripPingLatencyCycles, + int ReorderingBytes, + double DCFCLK, + int TotalNumberOfActiveDPP, + int PixelChunkSizeInKByte, + int TotalNumberOfDCCActiveDPP, + int MetaChunkSize, + double ReturnBW, + bool GPUVMEnable, + bool HostVMEnable, + int NumberOfActivePlanes, + int NumberOfDPP[], + int dpte_group_bytes[], + double HostVMInefficiencyFactor, + double HostVMMinPageSize, + int HostVMMaxNonCachedPageTableLevels) +{ + double ExtraLatencyBytes; + double ExtraLatency; + + ExtraLatencyBytes = CalculateExtraLatencyBytes( + ReorderingBytes, + TotalNumberOfActiveDPP, + PixelChunkSizeInKByte, + TotalNumberOfDCCActiveDPP, + MetaChunkSize, + GPUVMEnable, + HostVMEnable, + NumberOfActivePlanes, + NumberOfDPP, + dpte_group_bytes, + HostVMInefficiencyFactor, + HostVMMinPageSize, + HostVMMaxNonCachedPageTableLevels); + + ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles); + dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK); + dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes); + dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW); + dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency); +#endif + + return ExtraLatency; +} + +static double CalculateExtraLatencyBytes( + int ReorderingBytes, + int TotalNumberOfActiveDPP, + int PixelChunkSizeInKByte, + int TotalNumberOfDCCActiveDPP, + int MetaChunkSize, + bool GPUVMEnable, + bool HostVMEnable, + int NumberOfActivePlanes, + int NumberOfDPP[], + int dpte_group_bytes[], + double HostVMInefficiencyFactor, + double HostVMMinPageSize, + int HostVMMaxNonCachedPageTableLevels) +{ + double ret; + int HostVMDynamicLevels = 0, k; + + if (GPUVMEnable == true && HostVMEnable == true) { + if (HostVMMinPageSize < 2048) { + HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; + } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) { + HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1); + } else { + HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2); + } + } else { + HostVMDynamicLevels = 0; + } + + ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0; + + if (GPUVMEnable == true) { + for (k = 0; k < NumberOfActivePlanes; ++k) { + ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor; + } + } + return ret; +} + +static double CalculateUrgentLatency( + double UrgentLatencyPixelDataOnly, + double UrgentLatencyPixelMixedWithVMData, + double UrgentLatencyVMDataOnly, + bool DoUrgentLatencyAdjustment, + double UrgentLatencyAdjustmentFabricClockComponent, + double UrgentLatencyAdjustmentFabricClockReference, + double FabricClock) +{ + double ret; + + ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly); + if (DoUrgentLatencyAdjustment == true) { + ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1); + } + return ret; +} + +static void UseMinimumDCFCLK( + struct display_mode_lib *mode_lib, + int MaxPrefetchMode, + int ReorderingBytes) +{ + struct vba_vars_st *v = &mode_lib->vba; + int dummy1, i, j, k; + double NormalEfficiency, dummy2, dummy3; + double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2]; + + NormalEfficiency = v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0; + for (i = 0; i < v->soc.num_states; ++i) { + for (j = 0; j <= 1; ++j) { + double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX]; + double PrefetchPixelLinesTime[DC__NUM_DPP__MAX]; + double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX]; + double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX]; + double MinimumTWait; + double NonDPTEBandwidth; + double DPTEBandwidth; + double DCFCLKRequiredForAverageBandwidth; + double ExtraLatencyBytes; + double ExtraLatencyCycles; + double DCFCLKRequiredForPeakBandwidth; + int NoOfDPPState[DC__NUM_DPP__MAX]; + double MinimumTvmPlus2Tr0; + + TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0; + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j] + + v->NoOfDPP[i][j][k] * v->DPTEBytesPerRow[i][j][k] / (15.75 * v->HTotal[k] / v->PixelClock[k]); + } + + for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) { + NoOfDPPState[k] = v->NoOfDPP[i][j][k]; + } + + MinimumTWait = CalculateTWait(MaxPrefetchMode, v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime); + NonDPTEBandwidth = v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j]; + DPTEBandwidth = (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) ? + TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : v->TotalDPTERowBandwidth[i][j]; + DCFCLKRequiredForAverageBandwidth = dml_max3( + v->ProjectedDCFCLKDeepSleep[i][j], + (NonDPTEBandwidth + v->TotalDPTERowBandwidth[i][j]) / v->ReturnBusWidth + / (v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100), + (NonDPTEBandwidth + DPTEBandwidth / NormalEfficiency) / NormalEfficiency / v->ReturnBusWidth); + + ExtraLatencyBytes = CalculateExtraLatencyBytes( + ReorderingBytes, + v->TotalNumberOfActiveDPP[i][j], + v->PixelChunkSizeInKByte, + v->TotalNumberOfDCCActiveDPP[i][j], + v->MetaChunkSize, + v->GPUVMEnable, + v->HostVMEnable, + v->NumberOfActivePlanes, + NoOfDPPState, + v->dpte_group_bytes, + 1, + v->HostVMMinPageSize, + v->HostVMMaxNonCachedPageTableLevels); + ExtraLatencyCycles = v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + ExtraLatencyBytes / NormalEfficiency / v->ReturnBusWidth; + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + double DCFCLKCyclesRequiredInPrefetch; + double ExpectedPrefetchBWAcceleration; + double PrefetchTime; + + PixelDCFCLKCyclesRequiredInPrefetch[k] = (v->PrefetchLinesY[i][j][k] * v->swath_width_luma_ub_all_states[i][j][k] * v->BytePerPixelY[k] + + v->PrefetchLinesC[i][j][k] * v->swath_width_chroma_ub_all_states[i][j][k] * v->BytePerPixelC[k]) / NormalEfficiency / v->ReturnBusWidth; + DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k] + + v->PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth * (v->GPUVMMaxPageTableLevels > 2 ? 1 : 0) + + 2 * v->DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth + + 2 * v->MetaRowBytes[i][j][k] / NormalEfficiency / v->ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k]; + PrefetchPixelLinesTime[k] = dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] / v->PixelClock[k]; + ExpectedPrefetchBWAcceleration = (v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k]) + / (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]); + DynamicMetadataVMExtraLatency[k] = + (v->GPUVMEnable == true && v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true) ? + v->UrgLatency[i] * v->GPUVMMaxPageTableLevels * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0; + PrefetchTime = (v->MaximumVStartup[i][j][k] - 1) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait + - v->UrgLatency[i] + * ((v->GPUVMMaxPageTableLevels <= 2 ? v->GPUVMMaxPageTableLevels : v->GPUVMMaxPageTableLevels - 2) + * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) + - DynamicMetadataVMExtraLatency[k]; + + if (PrefetchTime > 0) { + double ExpectedVRatioPrefetch; + ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] + / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch); + DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k] + * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration; + if (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) { + DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k] + + NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth; + } + } else { + DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i]; + } + if (v->DynamicMetadataEnable[k] == true) { + double TSetupPipe; + double TdmbfPipe; + double TdmsksPipe; + double TdmecPipe; + double AllowedTimeForUrgentExtraLatency; + + CalculateVupdateAndDynamicMetadataParameters( + v->MaxInterDCNTileRepeaters, + v->RequiredDPPCLK[i][j][k], + v->RequiredDISPCLK[i][j], + v->ProjectedDCFCLKDeepSleep[i][j], + v->PixelClock[k], + v->HTotal[k], + v->VTotal[k] - v->VActive[k], + v->DynamicMetadataTransmittedBytes[k], + v->DynamicMetadataLinesBeforeActiveRequired[k], + v->Interlace[k], + v->ProgressiveToInterlaceUnitInOPP, + &TSetupPipe, + &TdmbfPipe, + &TdmecPipe, + &TdmsksPipe, + &dummy1, + &dummy2, + &dummy3); + AllowedTimeForUrgentExtraLatency = v->MaximumVStartup[i][j][k] * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe + - TdmsksPipe - DynamicMetadataVMExtraLatency[k]; + if (AllowedTimeForUrgentExtraLatency > 0) { + DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max( + DCFCLKRequiredForPeakBandwidthPerPlane[k], + ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency); + } else { + DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i]; + } + } + } + DCFCLKRequiredForPeakBandwidth = 0; + for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) { + DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k]; + } + MinimumTvmPlus2Tr0 = v->UrgLatency[i] + * (v->GPUVMEnable == true ? + (v->HostVMEnable == true ? + (v->GPUVMMaxPageTableLevels + 2) * (v->HostVMMaxNonCachedPageTableLevels + 1) - 1 : v->GPUVMMaxPageTableLevels + 1) : + 0); + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + double MaximumTvmPlus2Tr0PlusTsw; + MaximumTvmPlus2Tr0PlusTsw = (v->MaximumVStartup[i][j][k] - 2) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k]; + if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) { + DCFCLKRequiredForPeakBandwidth = v->DCFCLKPerState[i]; + } else { + DCFCLKRequiredForPeakBandwidth = dml_max3( + DCFCLKRequiredForPeakBandwidth, + 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4), + (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0)); + } + } + v->DCFCLKState[i][j] = dml_min(v->DCFCLKPerState[i], 1.05 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth)); + } + } +} + +static void CalculateUnboundedRequestAndCompressedBufferSize( + unsigned int DETBufferSizeInKByte, + int ConfigReturnBufferSizeInKByte, + enum unbounded_requesting_policy UseUnboundedRequestingFinal, + int TotalActiveDPP, + bool NoChromaPlanes, + int MaxNumDPP, + int CompressedBufferSegmentSizeInkByteFinal, + enum output_encoder_class *Output, + bool *UnboundedRequestEnabled, + int *CompressedBufferSizeInkByte) +{ + double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64); + + *UnboundedRequestEnabled = UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaPlanes, Output[0]); + *CompressedBufferSizeInkByte = ( + *UnboundedRequestEnabled == true ? + ConfigReturnBufferSizeInKByte - TotalActiveDPP * actDETBufferSizeInKByte : + ConfigReturnBufferSizeInKByte - MaxNumDPP * actDETBufferSizeInKByte); + *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP); + dml_print("DML::%s: DETBufferSizeInKByte = %d\n", __func__, DETBufferSizeInKByte); + dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte); + dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal); + dml_print("DML::%s: actDETBufferSizeInKByte = %f\n", __func__, actDETBufferSizeInKByte); + dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled); + dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte); +#endif +} + +static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output) +{ + bool ret_val = false; + + ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && TotalNumberOfActiveDPP == 1 && NoChroma); + if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp) { + ret_val = false; + } + return (ret_val); +} + diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.h b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.h new file mode 100644 index 000000000..90be612f2 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.h @@ -0,0 +1,43 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DML31_DISPLAY_MODE_VBA_H__ +#define __DML31_DISPLAY_MODE_VBA_H__ + +void dml31_recalculate(struct display_mode_lib *mode_lib); +void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib); +double dml31_CalculateWriteBackDISPCLK( + enum source_format_class WritebackPixelFormat, + double PixelClock, + double WritebackHRatio, + double WritebackVRatio, + unsigned int WritebackHTaps, + unsigned int WritebackVTaps, + long WritebackSourceWidth, + long WritebackDestinationWidth, + unsigned int HTotal, + unsigned int WritebackLineBufferSize); + +#endif /* __DML31_DISPLAY_MODE_VBA_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c new file mode 100644 index 000000000..d7ee26b62 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c @@ -0,0 +1,1618 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "../display_mode_lib.h" +#include "../display_mode_vba.h" +#include "../dml_inline_defs.h" +#include "display_rq_dlg_calc_31.h" +#include "../dcn30/display_mode_vba_30.h" + +static bool is_dual_plane(enum source_format_class source_format) +{ + bool ret_val = 0; + + if ((source_format == dm_420_12) || (source_format == dm_420_8) || (source_format == dm_420_10) || (source_format == dm_rgbe_alpha)) + ret_val = 1; + + return ret_val; +} + +static double get_refcyc_per_delivery( + struct display_mode_lib *mode_lib, + double refclk_freq_in_mhz, + double pclk_freq_in_mhz, + unsigned int odm_combine, + unsigned int recout_width, + unsigned int hactive, + double vratio, + double hscale_pixel_rate, + unsigned int delivery_width, + unsigned int req_per_swath_ub) +{ + double refcyc_per_delivery = 0.0; + + if (vratio <= 1.0) { + if (odm_combine) + refcyc_per_delivery = (double) refclk_freq_in_mhz * (double) ((unsigned int) odm_combine * 2) + * dml_min((double) recout_width, (double) hactive / ((unsigned int) odm_combine * 2)) / pclk_freq_in_mhz / (double) req_per_swath_ub; + else + refcyc_per_delivery = (double) refclk_freq_in_mhz * (double) recout_width / pclk_freq_in_mhz / (double) req_per_swath_ub; + } else { + refcyc_per_delivery = (double) refclk_freq_in_mhz * (double) delivery_width / (double) hscale_pixel_rate / (double) req_per_swath_ub; + } + +#ifdef __DML_RQ_DLG_CALC_DEBUG__ + dml_print("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, refclk_freq_in_mhz); + dml_print("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, pclk_freq_in_mhz); + dml_print("DML_DLG: %s: recout_width = %d\n", __func__, recout_width); + dml_print("DML_DLG: %s: vratio = %3.2f\n", __func__, vratio); + dml_print("DML_DLG: %s: req_per_swath_ub = %d\n", __func__, req_per_swath_ub); + dml_print("DML_DLG: %s: hscale_pixel_rate = %3.2f\n", __func__, hscale_pixel_rate); + dml_print("DML_DLG: %s: delivery_width = %d\n", __func__, delivery_width); + dml_print("DML_DLG: %s: refcyc_per_delivery= %3.2f\n", __func__, refcyc_per_delivery); +#endif + + return refcyc_per_delivery; + +} + +static unsigned int get_blk_size_bytes(const enum source_macro_tile_size tile_size) +{ + if (tile_size == dm_256k_tile) + return (256 * 1024); + else if (tile_size == dm_64k_tile) + return (64 * 1024); + else + return (4 * 1024); +} + +static void extract_rq_sizing_regs(struct display_mode_lib *mode_lib, display_data_rq_regs_st *rq_regs, const display_data_rq_sizing_params_st *rq_sizing) +{ + print__data_rq_sizing_params_st(mode_lib, rq_sizing); + + rq_regs->chunk_size = dml_log2(rq_sizing->chunk_bytes) - 10; + + if (rq_sizing->min_chunk_bytes == 0) + rq_regs->min_chunk_size = 0; + else + rq_regs->min_chunk_size = dml_log2(rq_sizing->min_chunk_bytes) - 8 + 1; + + rq_regs->meta_chunk_size = dml_log2(rq_sizing->meta_chunk_bytes) - 10; + if (rq_sizing->min_meta_chunk_bytes == 0) + rq_regs->min_meta_chunk_size = 0; + else + rq_regs->min_meta_chunk_size = dml_log2(rq_sizing->min_meta_chunk_bytes) - 6 + 1; + + rq_regs->dpte_group_size = dml_log2(rq_sizing->dpte_group_bytes) - 6; + rq_regs->mpte_group_size = dml_log2(rq_sizing->mpte_group_bytes) - 6; +} + +static void extract_rq_regs(struct display_mode_lib *mode_lib, display_rq_regs_st *rq_regs, const display_rq_params_st *rq_param) +{ + unsigned int detile_buf_size_in_bytes = mode_lib->ip.det_buffer_size_kbytes * 1024; + unsigned int detile_buf_plane1_addr = 0; + + extract_rq_sizing_regs(mode_lib, &(rq_regs->rq_regs_l), &rq_param->sizing.rq_l); + + rq_regs->rq_regs_l.pte_row_height_linear = dml_floor(dml_log2(rq_param->dlg.rq_l.dpte_row_height), 1) - 3; + + if (rq_param->yuv420) { + extract_rq_sizing_regs(mode_lib, &(rq_regs->rq_regs_c), &rq_param->sizing.rq_c); + rq_regs->rq_regs_c.pte_row_height_linear = dml_floor(dml_log2(rq_param->dlg.rq_c.dpte_row_height), 1) - 3; + } + + rq_regs->rq_regs_l.swath_height = dml_log2(rq_param->dlg.rq_l.swath_height); + rq_regs->rq_regs_c.swath_height = dml_log2(rq_param->dlg.rq_c.swath_height); + + // FIXME: take the max between luma, chroma chunk size? + // okay for now, as we are setting chunk_bytes to 8kb anyways + if (rq_param->sizing.rq_l.chunk_bytes >= 32 * 1024 || (rq_param->yuv420 && rq_param->sizing.rq_c.chunk_bytes >= 32 * 1024)) { //32kb + rq_regs->drq_expansion_mode = 0; + } else { + rq_regs->drq_expansion_mode = 2; + } + rq_regs->prq_expansion_mode = 1; + rq_regs->mrq_expansion_mode = 1; + rq_regs->crq_expansion_mode = 1; + + // Note: detile_buf_plane1_addr is in unit of 1KB + if (rq_param->yuv420) { + if ((double) rq_param->misc.rq_l.stored_swath_bytes / (double) rq_param->misc.rq_c.stored_swath_bytes <= 1.5) { + detile_buf_plane1_addr = (detile_buf_size_in_bytes / 2.0 / 1024.0); // half to chroma +#ifdef __DML_RQ_DLG_CALC_DEBUG__ + dml_print("DML_DLG: %s: detile_buf_plane1_addr = %0d (1/2 to chroma)\n", __func__, detile_buf_plane1_addr); +#endif + } else { + detile_buf_plane1_addr = dml_round_to_multiple((unsigned int) ((2.0 * detile_buf_size_in_bytes) / 3.0), 1024, 0) / 1024.0; // 2/3 to luma +#ifdef __DML_RQ_DLG_CALC_DEBUG__ + dml_print("DML_DLG: %s: detile_buf_plane1_addr = %0d (1/3 chroma)\n", __func__, detile_buf_plane1_addr); +#endif + } + } + rq_regs->plane1_base_address = detile_buf_plane1_addr; + +#ifdef __DML_RQ_DLG_CALC_DEBUG__ + dml_print("DML_DLG: %s: detile_buf_size_in_bytes = %0d\n", __func__, detile_buf_size_in_bytes); + dml_print("DML_DLG: %s: detile_buf_plane1_addr = %0d\n", __func__, detile_buf_plane1_addr); + dml_print("DML_DLG: %s: plane1_base_address = %0d\n", __func__, rq_regs->plane1_base_address); + dml_print("DML_DLG: %s: rq_l.stored_swath_bytes = %0d\n", __func__, rq_param->misc.rq_l.stored_swath_bytes); + dml_print("DML_DLG: %s: rq_c.stored_swath_bytes = %0d\n", __func__, rq_param->misc.rq_c.stored_swath_bytes); + dml_print("DML_DLG: %s: rq_l.swath_height = %0d\n", __func__, rq_param->dlg.rq_l.swath_height); + dml_print("DML_DLG: %s: rq_c.swath_height = %0d\n", __func__, rq_param->dlg.rq_c.swath_height); +#endif +} + +static void handle_det_buf_split(struct display_mode_lib *mode_lib, display_rq_params_st *rq_param, const display_pipe_source_params_st *pipe_src_param) +{ + unsigned int total_swath_bytes = 0; + unsigned int swath_bytes_l = 0; + unsigned int swath_bytes_c = 0; + unsigned int full_swath_bytes_packed_l = 0; + unsigned int full_swath_bytes_packed_c = 0; + bool req128_l = 0; + bool req128_c = 0; + bool surf_linear = (pipe_src_param->sw_mode == dm_sw_linear); + bool surf_vert = (pipe_src_param->source_scan == dm_vert); + unsigned int log2_swath_height_l = 0; + unsigned int log2_swath_height_c = 0; + unsigned int detile_buf_size_in_bytes = mode_lib->ip.det_buffer_size_kbytes * 1024; + + full_swath_bytes_packed_l = rq_param->misc.rq_l.full_swath_bytes; + full_swath_bytes_packed_c = rq_param->misc.rq_c.full_swath_bytes; + +#ifdef __DML_RQ_DLG_CALC_DEBUG__ + dml_print("DML_DLG: %s: full_swath_bytes_packed_l = %0d\n", __func__, full_swath_bytes_packed_l); + dml_print("DML_DLG: %s: full_swath_bytes_packed_c = %0d\n", __func__, full_swath_bytes_packed_c); +#endif + + if (rq_param->yuv420_10bpc) { + full_swath_bytes_packed_l = dml_round_to_multiple(rq_param->misc.rq_l.full_swath_bytes * 2.0 / 3.0, 256, 1) + 256; + full_swath_bytes_packed_c = dml_round_to_multiple(rq_param->misc.rq_c.full_swath_bytes * 2.0 / 3.0, 256, 1) + 256; +#ifdef __DML_RQ_DLG_CALC_DEBUG__ + dml_print("DML_DLG: %s: full_swath_bytes_packed_l = %0d (3-2 packing)\n", __func__, full_swath_bytes_packed_l); + dml_print("DML_DLG: %s: full_swath_bytes_packed_c = %0d (3-2 packing)\n", __func__, full_swath_bytes_packed_c); +#endif + } + + if (rq_param->yuv420) + total_swath_bytes = 2 * full_swath_bytes_packed_l + 2 * full_swath_bytes_packed_c; + else + total_swath_bytes = 2 * full_swath_bytes_packed_l; + +#ifdef __DML_RQ_DLG_CALC_DEBUG__ + dml_print("DML_DLG: %s: total_swath_bytes = %0d\n", __func__, total_swath_bytes); + dml_print("DML_DLG: %s: detile_buf_size_in_bytes = %0d\n", __func__, detile_buf_size_in_bytes); +#endif + + if (total_swath_bytes <= detile_buf_size_in_bytes) { //full 256b request + req128_l = 0; + req128_c = 0; + swath_bytes_l = full_swath_bytes_packed_l; + swath_bytes_c = full_swath_bytes_packed_c; + } else if (!rq_param->yuv420) { + req128_l = 1; + req128_c = 0; + swath_bytes_c = full_swath_bytes_packed_c; + swath_bytes_l = full_swath_bytes_packed_l / 2; + } else if ((double) full_swath_bytes_packed_l / (double) full_swath_bytes_packed_c < 1.5) { + req128_l = 0; + req128_c = 1; + swath_bytes_l = full_swath_bytes_packed_l; + swath_bytes_c = full_swath_bytes_packed_c / 2; + + total_swath_bytes = 2 * swath_bytes_l + 2 * swath_bytes_c; + + if (total_swath_bytes > detile_buf_size_in_bytes) { + req128_l = 1; + swath_bytes_l = full_swath_bytes_packed_l / 2; + } + } else { + req128_l = 1; + req128_c = 0; + swath_bytes_l = full_swath_bytes_packed_l / 2; + swath_bytes_c = full_swath_bytes_packed_c; + + total_swath_bytes = 2 * swath_bytes_l + 2 * swath_bytes_c; + + if (total_swath_bytes > detile_buf_size_in_bytes) { + req128_c = 1; + swath_bytes_c = full_swath_bytes_packed_c / 2; + } + } + + if (rq_param->yuv420) + total_swath_bytes = 2 * swath_bytes_l + 2 * swath_bytes_c; + else + total_swath_bytes = 2 * swath_bytes_l; + + rq_param->misc.rq_l.stored_swath_bytes = swath_bytes_l; + rq_param->misc.rq_c.stored_swath_bytes = swath_bytes_c; + +#ifdef __DML_RQ_DLG_CALC_DEBUG__ + dml_print("DML_DLG: %s: total_swath_bytes = %0d\n", __func__, total_swath_bytes); + dml_print("DML_DLG: %s: rq_l.stored_swath_bytes = %0d\n", __func__, rq_param->misc.rq_l.stored_swath_bytes); + dml_print("DML_DLG: %s: rq_c.stored_swath_bytes = %0d\n", __func__, rq_param->misc.rq_c.stored_swath_bytes); +#endif + if (surf_linear) { + log2_swath_height_l = 0; + log2_swath_height_c = 0; + } else { + unsigned int swath_height_l; + unsigned int swath_height_c; + + if (!surf_vert) { + swath_height_l = rq_param->misc.rq_l.blk256_height; + swath_height_c = rq_param->misc.rq_c.blk256_height; + } else { + swath_height_l = rq_param->misc.rq_l.blk256_width; + swath_height_c = rq_param->misc.rq_c.blk256_width; + } + + if (swath_height_l > 0) + log2_swath_height_l = dml_log2(swath_height_l); + + if (req128_l && log2_swath_height_l > 0) + log2_swath_height_l -= 1; + + if (swath_height_c > 0) + log2_swath_height_c = dml_log2(swath_height_c); + + if (req128_c && log2_swath_height_c > 0) + log2_swath_height_c -= 1; + } + + rq_param->dlg.rq_l.swath_height = 1 << log2_swath_height_l; + rq_param->dlg.rq_c.swath_height = 1 << log2_swath_height_c; + +#ifdef __DML_RQ_DLG_CALC_DEBUG__ + dml_print("DML_DLG: %s: req128_l = %0d\n", __func__, req128_l); + dml_print("DML_DLG: %s: req128_c = %0d\n", __func__, req128_c); + dml_print("DML_DLG: %s: full_swath_bytes_packed_l = %0d\n", __func__, full_swath_bytes_packed_l); + dml_print("DML_DLG: %s: full_swath_bytes_packed_c = %0d\n", __func__, full_swath_bytes_packed_c); + dml_print("DML_DLG: %s: swath_height luma = %0d\n", __func__, rq_param->dlg.rq_l.swath_height); + dml_print("DML_DLG: %s: swath_height chroma = %0d\n", __func__, rq_param->dlg.rq_c.swath_height); +#endif +} + +static void get_meta_and_pte_attr( + struct display_mode_lib *mode_lib, + display_data_rq_dlg_params_st *rq_dlg_param, + display_data_rq_misc_params_st *rq_misc_param, + display_data_rq_sizing_params_st *rq_sizing_param, + unsigned int vp_width, + unsigned int vp_height, + unsigned int data_pitch, + unsigned int meta_pitch, + unsigned int source_format, + unsigned int tiling, + unsigned int macro_tile_size, + unsigned int source_scan, + unsigned int hostvm_enable, + unsigned int is_chroma, + unsigned int surface_height) +{ + bool surf_linear = (tiling == dm_sw_linear); + bool surf_vert = (source_scan == dm_vert); + + unsigned int bytes_per_element; + unsigned int bytes_per_element_y; + unsigned int bytes_per_element_c; + + unsigned int blk256_width = 0; + unsigned int blk256_height = 0; + + unsigned int blk256_width_y = 0; + unsigned int blk256_height_y = 0; + unsigned int blk256_width_c = 0; + unsigned int blk256_height_c = 0; + unsigned int log2_bytes_per_element; + unsigned int log2_blk256_width; + unsigned int log2_blk256_height; + unsigned int blk_bytes; + unsigned int log2_blk_bytes; + unsigned int log2_blk_height; + unsigned int log2_blk_width; + unsigned int log2_meta_req_bytes; + unsigned int log2_meta_req_height; + unsigned int log2_meta_req_width; + unsigned int meta_req_width; + unsigned int meta_req_height; + unsigned int log2_meta_row_height; + unsigned int meta_row_width_ub; + unsigned int log2_meta_chunk_bytes; + unsigned int log2_meta_chunk_height; + + //full sized meta chunk width in unit of data elements + unsigned int log2_meta_chunk_width; + unsigned int log2_min_meta_chunk_bytes; + unsigned int min_meta_chunk_width; + unsigned int meta_chunk_width; + unsigned int meta_chunk_per_row_int; + unsigned int meta_row_remainder; + unsigned int meta_chunk_threshold; + unsigned int meta_blk_height; + unsigned int meta_surface_bytes; + unsigned int vmpg_bytes; + unsigned int meta_pte_req_per_frame_ub; + unsigned int meta_pte_bytes_per_frame_ub; + const unsigned int log2_vmpg_bytes = dml_log2(mode_lib->soc.gpuvm_min_page_size_bytes); + const bool dual_plane_en = is_dual_plane((enum source_format_class) (source_format)); + const unsigned int dpte_buf_in_pte_reqs = + dual_plane_en ? (is_chroma ? mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma : mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma) : (mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma + + mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma); + + unsigned int log2_vmpg_height = 0; + unsigned int log2_vmpg_width = 0; + unsigned int log2_dpte_req_height_ptes = 0; + unsigned int log2_dpte_req_height = 0; + unsigned int log2_dpte_req_width = 0; + unsigned int log2_dpte_row_height_linear = 0; + unsigned int log2_dpte_row_height = 0; + unsigned int log2_dpte_group_width = 0; + unsigned int dpte_row_width_ub = 0; + unsigned int dpte_req_height = 0; + unsigned int dpte_req_width = 0; + unsigned int dpte_group_width = 0; + unsigned int log2_dpte_group_bytes = 0; + unsigned int log2_dpte_group_length = 0; + double byte_per_pixel_det_y; + double byte_per_pixel_det_c; + + dml30_CalculateBytePerPixelAnd256BBlockSizes( + (enum source_format_class) (source_format), + (enum dm_swizzle_mode) (tiling), + &bytes_per_element_y, + &bytes_per_element_c, + &byte_per_pixel_det_y, + &byte_per_pixel_det_c, + &blk256_height_y, + &blk256_height_c, + &blk256_width_y, + &blk256_width_c); + + if (!is_chroma) { + blk256_width = blk256_width_y; + blk256_height = blk256_height_y; + bytes_per_element = bytes_per_element_y; + } else { + blk256_width = blk256_width_c; + blk256_height = blk256_height_c; + bytes_per_element = bytes_per_element_c; + } + + log2_bytes_per_element = dml_log2(bytes_per_element); + + dml_print("DML_DLG: %s: surf_linear = %d\n", __func__, surf_linear); + dml_print("DML_DLG: %s: surf_vert = %d\n", __func__, surf_vert); + dml_print("DML_DLG: %s: blk256_width = %d\n", __func__, blk256_width); + dml_print("DML_DLG: %s: blk256_height = %d\n", __func__, blk256_height); + + log2_blk256_width = dml_log2((double) blk256_width); + log2_blk256_height = dml_log2((double) blk256_height); + blk_bytes = surf_linear ? 256 : get_blk_size_bytes((enum source_macro_tile_size) macro_tile_size); + log2_blk_bytes = dml_log2((double) blk_bytes); + log2_blk_height = 0; + log2_blk_width = 0; + + // remember log rule + // "+" in log is multiply + // "-" in log is divide + // "/2" is like square root + // blk is vertical biased + if (tiling != dm_sw_linear) + log2_blk_height = log2_blk256_height + dml_ceil((double) (log2_blk_bytes - 8) / 2.0, 1); + else + log2_blk_height = 0; // blk height of 1 + + log2_blk_width = log2_blk_bytes - log2_bytes_per_element - log2_blk_height; + + if (!surf_vert) { + int unsigned temp; + + temp = dml_round_to_multiple(vp_width - 1, blk256_width, 1) + blk256_width; + if (data_pitch < blk256_width) { + dml_print("WARNING: DML_DLG: %s: swath_size calculation ignoring data_pitch=%u < blk256_width=%u\n", __func__, data_pitch, blk256_width); + } else { + if (temp > data_pitch) { + if (data_pitch >= vp_width) + temp = data_pitch; + else + dml_print("WARNING: DML_DLG: %s: swath_size calculation ignoring data_pitch=%u < vp_width=%u\n", __func__, data_pitch, vp_width); + } + } + rq_dlg_param->swath_width_ub = temp; + rq_dlg_param->req_per_swath_ub = temp >> log2_blk256_width; + } else { + int unsigned temp; + + temp = dml_round_to_multiple(vp_height - 1, blk256_height, 1) + blk256_height; + if (surface_height < blk256_height) { + dml_print("WARNING: DML_DLG: %s swath_size calculation ignored surface_height=%u < blk256_height=%u\n", __func__, surface_height, blk256_height); + } else { + if (temp > surface_height) { + if (surface_height >= vp_height) + temp = surface_height; + else + dml_print("WARNING: DML_DLG: %s swath_size calculation ignored surface_height=%u < vp_height=%u\n", __func__, surface_height, vp_height); + } + } + rq_dlg_param->swath_width_ub = temp; + rq_dlg_param->req_per_swath_ub = temp >> log2_blk256_height; + } + + if (!surf_vert) + rq_misc_param->full_swath_bytes = rq_dlg_param->swath_width_ub * blk256_height * bytes_per_element; + else + rq_misc_param->full_swath_bytes = rq_dlg_param->swath_width_ub * blk256_width * bytes_per_element; + + rq_misc_param->blk256_height = blk256_height; + rq_misc_param->blk256_width = blk256_width; + + // ------- + // meta + // ------- + log2_meta_req_bytes = 6; // meta request is 64b and is 8x8byte meta element + + // each 64b meta request for dcn is 8x8 meta elements and + // a meta element covers one 256b block of the data surface. + log2_meta_req_height = log2_blk256_height + 3; // meta req is 8x8 byte, each byte represent 1 blk256 + log2_meta_req_width = log2_meta_req_bytes + 8 - log2_bytes_per_element - log2_meta_req_height; + meta_req_width = 1 << log2_meta_req_width; + meta_req_height = 1 << log2_meta_req_height; + log2_meta_row_height = 0; + meta_row_width_ub = 0; + + // the dimensions of a meta row are meta_row_width x meta_row_height in elements. + // calculate upper bound of the meta_row_width + if (!surf_vert) { + log2_meta_row_height = log2_meta_req_height; + meta_row_width_ub = dml_round_to_multiple(vp_width - 1, meta_req_width, 1) + meta_req_width; + rq_dlg_param->meta_req_per_row_ub = meta_row_width_ub / meta_req_width; + } else { + log2_meta_row_height = log2_meta_req_width; + meta_row_width_ub = dml_round_to_multiple(vp_height - 1, meta_req_height, 1) + meta_req_height; + rq_dlg_param->meta_req_per_row_ub = meta_row_width_ub / meta_req_height; + } + rq_dlg_param->meta_bytes_per_row_ub = rq_dlg_param->meta_req_per_row_ub * 64; + + rq_dlg_param->meta_row_height = 1 << log2_meta_row_height; + + log2_meta_chunk_bytes = dml_log2(rq_sizing_param->meta_chunk_bytes); + log2_meta_chunk_height = log2_meta_row_height; + + //full sized meta chunk width in unit of data elements + log2_meta_chunk_width = log2_meta_chunk_bytes + 8 - log2_bytes_per_element - log2_meta_chunk_height; + log2_min_meta_chunk_bytes = dml_log2(rq_sizing_param->min_meta_chunk_bytes); + min_meta_chunk_width = 1 << (log2_min_meta_chunk_bytes + 8 - log2_bytes_per_element - log2_meta_chunk_height); + meta_chunk_width = 1 << log2_meta_chunk_width; + meta_chunk_per_row_int = (unsigned int) (meta_row_width_ub / meta_chunk_width); + meta_row_remainder = meta_row_width_ub % meta_chunk_width; + meta_chunk_threshold = 0; + meta_blk_height = blk256_height * 64; + meta_surface_bytes = meta_pitch * (dml_round_to_multiple(vp_height - 1, meta_blk_height, 1) + meta_blk_height) * bytes_per_element / 256; + vmpg_bytes = mode_lib->soc.gpuvm_min_page_size_bytes; + meta_pte_req_per_frame_ub = (dml_round_to_multiple(meta_surface_bytes - vmpg_bytes, 8 * vmpg_bytes, 1) + 8 * vmpg_bytes) / (8 * vmpg_bytes); + meta_pte_bytes_per_frame_ub = meta_pte_req_per_frame_ub * 64; //64B mpte request + rq_dlg_param->meta_pte_bytes_per_frame_ub = meta_pte_bytes_per_frame_ub; + + dml_print("DML_DLG: %s: meta_blk_height = %d\n", __func__, meta_blk_height); + dml_print("DML_DLG: %s: meta_surface_bytes = %d\n", __func__, meta_surface_bytes); + dml_print("DML_DLG: %s: meta_pte_req_per_frame_ub = %d\n", __func__, meta_pte_req_per_frame_ub); + dml_print("DML_DLG: %s: meta_pte_bytes_per_frame_ub = %d\n", __func__, meta_pte_bytes_per_frame_ub); + + if (!surf_vert) + meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width; + else + meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height; + + if (meta_row_remainder <= meta_chunk_threshold) + rq_dlg_param->meta_chunks_per_row_ub = meta_chunk_per_row_int + 1; + else + rq_dlg_param->meta_chunks_per_row_ub = meta_chunk_per_row_int + 2; + + // ------ + // dpte + // ------ + if (surf_linear) { + log2_vmpg_height = 0; // one line high + } else { + log2_vmpg_height = (log2_vmpg_bytes - 8) / 2 + log2_blk256_height; + } + log2_vmpg_width = log2_vmpg_bytes - log2_bytes_per_element - log2_vmpg_height; + + // only 3 possible shapes for dpte request in dimensions of ptes: 8x1, 4x2, 2x4. + if (surf_linear) { //one 64B PTE request returns 8 PTEs + log2_dpte_req_height_ptes = 0; + log2_dpte_req_width = log2_vmpg_width + 3; + log2_dpte_req_height = 0; + } else if (log2_blk_bytes == 12) { //4KB tile means 4kB page size + //one 64B req gives 8x1 PTEs for 4KB tile + log2_dpte_req_height_ptes = 0; + log2_dpte_req_width = log2_blk_width + 3; + log2_dpte_req_height = log2_blk_height + 0; + } else if ((log2_blk_bytes >= 16) && (log2_vmpg_bytes == 12)) { // tile block >= 64KB + //two 64B reqs of 2x4 PTEs give 16 PTEs to cover 64KB + log2_dpte_req_height_ptes = 4; + log2_dpte_req_width = log2_blk256_width + 4; // log2_64KB_width + log2_dpte_req_height = log2_blk256_height + 4; // log2_64KB_height + } else { //64KB page size and must 64KB tile block + //one 64B req gives 8x1 PTEs for 64KB tile + log2_dpte_req_height_ptes = 0; + log2_dpte_req_width = log2_blk_width + 3; + log2_dpte_req_height = log2_blk_height + 0; + } + + // The dpte request dimensions in data elements is dpte_req_width x dpte_req_height + // log2_vmpg_width is how much 1 pte represent, now calculating how much a 64b pte req represent + // That depends on the pte shape (i.e. 8x1, 4x2, 2x4) + //log2_dpte_req_height = log2_vmpg_height + log2_dpte_req_height_ptes; + //log2_dpte_req_width = log2_vmpg_width + log2_dpte_req_width_ptes; + dpte_req_height = 1 << log2_dpte_req_height; + dpte_req_width = 1 << log2_dpte_req_width; + + // calculate pitch dpte row buffer can hold + // round the result down to a power of two. + if (surf_linear) { + unsigned int dpte_row_height; + + log2_dpte_row_height_linear = dml_floor(dml_log2(dpte_buf_in_pte_reqs * dpte_req_width / data_pitch), 1); + + dml_print("DML_DLG: %s: is_chroma = %d\n", __func__, is_chroma); + dml_print("DML_DLG: %s: dpte_buf_in_pte_reqs = %d\n", __func__, dpte_buf_in_pte_reqs); + dml_print("DML_DLG: %s: log2_dpte_row_height_linear = %d\n", __func__, log2_dpte_row_height_linear); + + ASSERT(log2_dpte_row_height_linear >= 3); + + if (log2_dpte_row_height_linear > 7) + log2_dpte_row_height_linear = 7; + + log2_dpte_row_height = log2_dpte_row_height_linear; + // For linear, the dpte row is pitch dependent and the pte requests wrap at the pitch boundary. + // the dpte_row_width_ub is the upper bound of data_pitch*dpte_row_height in elements with this unique buffering. + dpte_row_height = 1 << log2_dpte_row_height; + dpte_row_width_ub = dml_round_to_multiple(data_pitch * dpte_row_height - 1, dpte_req_width, 1) + dpte_req_width; + rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_width; + } else { + // the upper bound of the dpte_row_width without dependency on viewport position follows. + // for tiled mode, row height is the same as req height and row store up to vp size upper bound + if (!surf_vert) { + log2_dpte_row_height = log2_dpte_req_height; + dpte_row_width_ub = dml_round_to_multiple(vp_width - 1, dpte_req_width, 1) + dpte_req_width; + rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_width; + } else { + log2_dpte_row_height = (log2_blk_width < log2_dpte_req_width) ? log2_blk_width : log2_dpte_req_width; + dpte_row_width_ub = dml_round_to_multiple(vp_height - 1, dpte_req_height, 1) + dpte_req_height; + rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_height; + } + } + if (log2_blk_bytes >= 16 && log2_vmpg_bytes == 12) // tile block >= 64KB + rq_dlg_param->dpte_bytes_per_row_ub = rq_dlg_param->dpte_req_per_row_ub * 128; //2*64B dpte request + else + rq_dlg_param->dpte_bytes_per_row_ub = rq_dlg_param->dpte_req_per_row_ub * 64; //64B dpte request + + rq_dlg_param->dpte_row_height = 1 << log2_dpte_row_height; + + // the dpte_group_bytes is reduced for the specific case of vertical + // access of a tile surface that has dpte request of 8x1 ptes. + if (hostvm_enable) + rq_sizing_param->dpte_group_bytes = 512; + else { + if (!surf_linear & (log2_dpte_req_height_ptes == 0) & surf_vert) //reduced, in this case, will have page fault within a group + rq_sizing_param->dpte_group_bytes = 512; + else + rq_sizing_param->dpte_group_bytes = 2048; + } + + //since pte request size is 64byte, the number of data pte requests per full sized group is as follows. + log2_dpte_group_bytes = dml_log2(rq_sizing_param->dpte_group_bytes); + log2_dpte_group_length = log2_dpte_group_bytes - 6; //length in 64b requests + + // full sized data pte group width in elements + if (!surf_vert) + log2_dpte_group_width = log2_dpte_group_length + log2_dpte_req_width; + else + log2_dpte_group_width = log2_dpte_group_length + log2_dpte_req_height; + + //But if the tile block >=64KB and the page size is 4KB, then each dPTE request is 2*64B + if ((log2_blk_bytes >= 16) && (log2_vmpg_bytes == 12)) // tile block >= 64KB + log2_dpte_group_width = log2_dpte_group_width - 1; + + dpte_group_width = 1 << log2_dpte_group_width; + + // since dpte groups are only aligned to dpte_req_width and not dpte_group_width, + // the upper bound for the dpte groups per row is as follows. + rq_dlg_param->dpte_groups_per_row_ub = dml_ceil((double) dpte_row_width_ub / dpte_group_width, 1); +} + +static void get_surf_rq_param( + struct display_mode_lib *mode_lib, + display_data_rq_sizing_params_st *rq_sizing_param, + display_data_rq_dlg_params_st *rq_dlg_param, + display_data_rq_misc_params_st *rq_misc_param, + const display_pipe_params_st *pipe_param, + bool is_chroma, + bool is_alpha) +{ + bool mode_422 = 0; + unsigned int vp_width = 0; + unsigned int vp_height = 0; + unsigned int data_pitch = 0; + unsigned int meta_pitch = 0; + unsigned int surface_height = 0; + unsigned int ppe = mode_422 ? 2 : 1; + + // FIXME check if ppe apply for both luma and chroma in 422 case + if (is_chroma | is_alpha) { + vp_width = pipe_param->src.viewport_width_c / ppe; + vp_height = pipe_param->src.viewport_height_c; + data_pitch = pipe_param->src.data_pitch_c; + meta_pitch = pipe_param->src.meta_pitch_c; + surface_height = pipe_param->src.surface_height_y / 2.0; + } else { + vp_width = pipe_param->src.viewport_width / ppe; + vp_height = pipe_param->src.viewport_height; + data_pitch = pipe_param->src.data_pitch; + meta_pitch = pipe_param->src.meta_pitch; + surface_height = pipe_param->src.surface_height_y; + } + + if (pipe_param->dest.odm_combine) { + unsigned int access_dir; + unsigned int full_src_vp_width; + unsigned int hactive_odm; + unsigned int src_hactive_odm; + + access_dir = (pipe_param->src.source_scan == dm_vert); // vp access direction: horizontal or vertical accessed + hactive_odm = pipe_param->dest.hactive / ((unsigned int) pipe_param->dest.odm_combine * 2); + if (is_chroma) { + full_src_vp_width = pipe_param->scale_ratio_depth.hscl_ratio_c * pipe_param->dest.full_recout_width; + src_hactive_odm = pipe_param->scale_ratio_depth.hscl_ratio_c * hactive_odm; + } else { + full_src_vp_width = pipe_param->scale_ratio_depth.hscl_ratio * pipe_param->dest.full_recout_width; + src_hactive_odm = pipe_param->scale_ratio_depth.hscl_ratio * hactive_odm; + } + + if (access_dir == 0) { + vp_width = dml_min(full_src_vp_width, src_hactive_odm); + dml_print("DML_DLG: %s: vp_width = %d\n", __func__, vp_width); + } else { + vp_height = dml_min(full_src_vp_width, src_hactive_odm); + dml_print("DML_DLG: %s: vp_height = %d\n", __func__, vp_height); + + } + dml_print("DML_DLG: %s: full_src_vp_width = %d\n", __func__, full_src_vp_width); + dml_print("DML_DLG: %s: hactive_odm = %d\n", __func__, hactive_odm); + dml_print("DML_DLG: %s: src_hactive_odm = %d\n", __func__, src_hactive_odm); + } + + rq_sizing_param->chunk_bytes = 8192; + + if (is_alpha) { + rq_sizing_param->chunk_bytes = 4096; + } + + if (rq_sizing_param->chunk_bytes == 64 * 1024) + rq_sizing_param->min_chunk_bytes = 0; + else + rq_sizing_param->min_chunk_bytes = 1024; + + rq_sizing_param->meta_chunk_bytes = 2048; + rq_sizing_param->min_meta_chunk_bytes = 256; + + if (pipe_param->src.hostvm) + rq_sizing_param->mpte_group_bytes = 512; + else + rq_sizing_param->mpte_group_bytes = 2048; + + get_meta_and_pte_attr( + mode_lib, + rq_dlg_param, + rq_misc_param, + rq_sizing_param, + vp_width, + vp_height, + data_pitch, + meta_pitch, + pipe_param->src.source_format, + pipe_param->src.sw_mode, + pipe_param->src.macro_tile_size, + pipe_param->src.source_scan, + pipe_param->src.hostvm, + is_chroma, + surface_height); +} + +static void dml_rq_dlg_get_rq_params(struct display_mode_lib *mode_lib, display_rq_params_st *rq_param, const display_pipe_params_st *pipe_param) +{ + // get param for luma surface + rq_param->yuv420 = pipe_param->src.source_format == dm_420_8 || pipe_param->src.source_format == dm_420_10 || pipe_param->src.source_format == dm_rgbe_alpha + || pipe_param->src.source_format == dm_420_12; + + rq_param->yuv420_10bpc = pipe_param->src.source_format == dm_420_10; + + rq_param->rgbe_alpha = (pipe_param->src.source_format == dm_rgbe_alpha) ? 1 : 0; + + get_surf_rq_param(mode_lib, &(rq_param->sizing.rq_l), &(rq_param->dlg.rq_l), &(rq_param->misc.rq_l), pipe_param, 0, 0); + + if (is_dual_plane((enum source_format_class) (pipe_param->src.source_format))) { + // get param for chroma surface + get_surf_rq_param(mode_lib, &(rq_param->sizing.rq_c), &(rq_param->dlg.rq_c), &(rq_param->misc.rq_c), pipe_param, 1, rq_param->rgbe_alpha); + } + + // calculate how to split the det buffer space between luma and chroma + handle_det_buf_split(mode_lib, rq_param, &pipe_param->src); + print__rq_params_st(mode_lib, rq_param); +} + +void dml31_rq_dlg_get_rq_reg(struct display_mode_lib *mode_lib, display_rq_regs_st *rq_regs, const display_pipe_params_st *pipe_param) +{ + display_rq_params_st rq_param = {0}; + + memset(rq_regs, 0, sizeof(*rq_regs)); + dml_rq_dlg_get_rq_params(mode_lib, &rq_param, pipe_param); + extract_rq_regs(mode_lib, rq_regs, &rq_param); + + print__rq_regs_st(mode_lib, rq_regs); +} + +static void calculate_ttu_cursor( + struct display_mode_lib *mode_lib, + double *refcyc_per_req_delivery_pre_cur, + double *refcyc_per_req_delivery_cur, + double refclk_freq_in_mhz, + double ref_freq_to_pix_freq, + double hscale_pixel_rate_l, + double hscl_ratio, + double vratio_pre_l, + double vratio_l, + unsigned int cur_width, + enum cursor_bpp cur_bpp) +{ + unsigned int cur_src_width = cur_width; + unsigned int cur_req_size = 0; + unsigned int cur_req_width = 0; + double cur_width_ub = 0.0; + double cur_req_per_width = 0.0; + double hactive_cur = 0.0; + + ASSERT(cur_src_width <= 256); + + *refcyc_per_req_delivery_pre_cur = 0.0; + *refcyc_per_req_delivery_cur = 0.0; + if (cur_src_width > 0) { + unsigned int cur_bit_per_pixel = 0; + + if (cur_bpp == dm_cur_2bit) { + cur_req_size = 64; // byte + cur_bit_per_pixel = 2; + } else { // 32bit + cur_bit_per_pixel = 32; + if (cur_src_width >= 1 && cur_src_width <= 16) + cur_req_size = 64; + else if (cur_src_width >= 17 && cur_src_width <= 31) + cur_req_size = 128; + else + cur_req_size = 256; + } + + cur_req_width = (double) cur_req_size / ((double) cur_bit_per_pixel / 8.0); + cur_width_ub = dml_ceil((double) cur_src_width / (double) cur_req_width, 1) * (double) cur_req_width; + cur_req_per_width = cur_width_ub / (double) cur_req_width; + hactive_cur = (double) cur_src_width / hscl_ratio; // FIXME: oswin to think about what to do for cursor + + if (vratio_pre_l <= 1.0) { + *refcyc_per_req_delivery_pre_cur = hactive_cur * ref_freq_to_pix_freq / (double) cur_req_per_width; + } else { + *refcyc_per_req_delivery_pre_cur = (double) refclk_freq_in_mhz * (double) cur_src_width / hscale_pixel_rate_l / (double) cur_req_per_width; + } + + ASSERT(*refcyc_per_req_delivery_pre_cur < dml_pow(2, 13)); + + if (vratio_l <= 1.0) { + *refcyc_per_req_delivery_cur = hactive_cur * ref_freq_to_pix_freq / (double) cur_req_per_width; + } else { + *refcyc_per_req_delivery_cur = (double) refclk_freq_in_mhz * (double) cur_src_width / hscale_pixel_rate_l / (double) cur_req_per_width; + } + + dml_print("DML_DLG: %s: cur_req_width = %d\n", __func__, cur_req_width); + dml_print("DML_DLG: %s: cur_width_ub = %3.2f\n", __func__, cur_width_ub); + dml_print("DML_DLG: %s: cur_req_per_width = %3.2f\n", __func__, cur_req_per_width); + dml_print("DML_DLG: %s: hactive_cur = %3.2f\n", __func__, hactive_cur); + dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_cur = %3.2f\n", __func__, *refcyc_per_req_delivery_pre_cur); + dml_print("DML_DLG: %s: refcyc_per_req_delivery_cur = %3.2f\n", __func__, *refcyc_per_req_delivery_cur); + + ASSERT(*refcyc_per_req_delivery_cur < dml_pow(2, 13)); + } +} + +// Note: currently taken in as is. +// Nice to decouple code from hw register implement and extract code that are repeated for luma and chroma. +static void dml_rq_dlg_get_dlg_params( + struct display_mode_lib *mode_lib, + const display_e2e_pipe_params_st *e2e_pipe_param, + const unsigned int num_pipes, + const unsigned int pipe_idx, + display_dlg_regs_st *disp_dlg_regs, + display_ttu_regs_st *disp_ttu_regs, + const display_rq_dlg_params_st *rq_dlg_param, + const display_dlg_sys_params_st *dlg_sys_param, + const bool cstate_en, + const bool pstate_en, + const bool vm_en, + const bool ignore_viewport_pos, + const bool immediate_flip_support) +{ + const display_pipe_source_params_st *src = &e2e_pipe_param[pipe_idx].pipe.src; + const display_pipe_dest_params_st *dst = &e2e_pipe_param[pipe_idx].pipe.dest; + const display_clocks_and_cfg_st *clks = &e2e_pipe_param[pipe_idx].clks_cfg; + const scaler_ratio_depth_st *scl = &e2e_pipe_param[pipe_idx].pipe.scale_ratio_depth; + const scaler_taps_st *taps = &e2e_pipe_param[pipe_idx].pipe.scale_taps; + unsigned int pipe_index_in_combine[DC__NUM_PIPES__MAX]; + + // ------------------------- + // Section 1.15.2.1: OTG dependent Params + // ------------------------- + // Timing + unsigned int htotal = dst->htotal; + unsigned int hblank_end = dst->hblank_end; + unsigned int vblank_start = dst->vblank_start; + unsigned int vblank_end = dst->vblank_end; + + double dppclk_freq_in_mhz = clks->dppclk_mhz; + double refclk_freq_in_mhz = clks->refclk_mhz; + double pclk_freq_in_mhz = dst->pixel_rate_mhz; + bool interlaced = dst->interlaced; + double ref_freq_to_pix_freq = refclk_freq_in_mhz / pclk_freq_in_mhz; + double min_ttu_vblank; + unsigned int dlg_vblank_start; + bool dual_plane; + bool mode_422; + unsigned int access_dir; + unsigned int vp_height_l; + unsigned int vp_width_l; + unsigned int vp_height_c; + unsigned int vp_width_c; + + // Scaling + unsigned int htaps_l; + unsigned int htaps_c; + double hratio_l; + double hratio_c; + double vratio_l; + double vratio_c; + bool scl_enable; + + unsigned int swath_width_ub_l; + unsigned int dpte_groups_per_row_ub_l; + unsigned int swath_width_ub_c; + unsigned int dpte_groups_per_row_ub_c; + + unsigned int meta_chunks_per_row_ub_l; + unsigned int meta_chunks_per_row_ub_c; + unsigned int vupdate_offset; + unsigned int vupdate_width; + unsigned int vready_offset; + + unsigned int vstartup_start; + unsigned int dst_x_after_scaler; + unsigned int dst_y_after_scaler; + double dst_y_prefetch; + double dst_y_per_vm_vblank; + double dst_y_per_row_vblank; + double dst_y_per_vm_flip; + double dst_y_per_row_flip; + double max_dst_y_per_vm_vblank; + double max_dst_y_per_row_vblank; + double vratio_pre_l; + double vratio_pre_c; + unsigned int req_per_swath_ub_l; + unsigned int req_per_swath_ub_c; + unsigned int meta_row_height_l; + unsigned int meta_row_height_c; + unsigned int swath_width_pixels_ub_l; + unsigned int swath_width_pixels_ub_c; + unsigned int scaler_rec_in_width_l; + unsigned int scaler_rec_in_width_c; + unsigned int dpte_row_height_l; + unsigned int dpte_row_height_c; + double hscale_pixel_rate_l; + double hscale_pixel_rate_c; + double min_hratio_fact_l; + double min_hratio_fact_c; + double refcyc_per_line_delivery_pre_l; + double refcyc_per_line_delivery_pre_c; + double refcyc_per_line_delivery_l; + double refcyc_per_line_delivery_c; + + double refcyc_per_req_delivery_pre_l; + double refcyc_per_req_delivery_pre_c; + double refcyc_per_req_delivery_l; + double refcyc_per_req_delivery_c; + + unsigned int full_recout_width; + double refcyc_per_req_delivery_pre_cur0; + double refcyc_per_req_delivery_cur0; + double refcyc_per_req_delivery_pre_cur1; + double refcyc_per_req_delivery_cur1; + int unsigned vba__min_dst_y_next_start = get_min_dst_y_next_start(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // FROM VBA + int unsigned vba__vready_after_vcount0 = get_vready_at_or_after_vsync(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA + + float vba__refcyc_per_line_delivery_pre_l = get_refcyc_per_line_delivery_pre_l_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA + float vba__refcyc_per_line_delivery_l = get_refcyc_per_line_delivery_l_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA + + float vba__refcyc_per_req_delivery_pre_l = get_refcyc_per_req_delivery_pre_l_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA + float vba__refcyc_per_req_delivery_l = get_refcyc_per_req_delivery_l_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA + + memset(disp_dlg_regs, 0, sizeof(*disp_dlg_regs)); + memset(disp_ttu_regs, 0, sizeof(*disp_ttu_regs)); + + dml_print("DML_DLG: %s: cstate_en = %d\n", __func__, cstate_en); + dml_print("DML_DLG: %s: pstate_en = %d\n", __func__, pstate_en); + dml_print("DML_DLG: %s: vm_en = %d\n", __func__, vm_en); + dml_print("DML_DLG: %s: ignore_viewport_pos = %d\n", __func__, ignore_viewport_pos); + dml_print("DML_DLG: %s: immediate_flip_support = %d\n", __func__, immediate_flip_support); + + dml_print("DML_DLG: %s: dppclk_freq_in_mhz = %3.2f\n", __func__, dppclk_freq_in_mhz); + dml_print("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, refclk_freq_in_mhz); + dml_print("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, pclk_freq_in_mhz); + dml_print("DML_DLG: %s: interlaced = %d\n", __func__, interlaced); ASSERT(ref_freq_to_pix_freq < 4.0); + + disp_dlg_regs->ref_freq_to_pix_freq = (unsigned int) (ref_freq_to_pix_freq * dml_pow(2, 19)); + disp_dlg_regs->refcyc_per_htotal = (unsigned int) (ref_freq_to_pix_freq * (double) htotal * dml_pow(2, 8)); + disp_dlg_regs->dlg_vblank_end = interlaced ? (vblank_end / 2) : vblank_end; // 15 bits + + //set_prefetch_mode(mode_lib, cstate_en, pstate_en, ignore_viewport_pos, immediate_flip_support); + min_ttu_vblank = get_min_ttu_vblank_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA + + dlg_vblank_start = interlaced ? (vblank_start / 2) : vblank_start; + disp_dlg_regs->min_dst_y_next_start = (unsigned int) (((double) dlg_vblank_start) * dml_pow(2, 2)); + disp_dlg_regs->min_dst_y_next_start_us = 0; + ASSERT(disp_dlg_regs->min_dst_y_next_start < (unsigned int)dml_pow(2, 18)); + + dml_print("DML_DLG: %s: min_ttu_vblank (us) = %3.2f\n", __func__, min_ttu_vblank); + dml_print("DML_DLG: %s: min_dst_y_next_start = 0x%0x\n", __func__, disp_dlg_regs->min_dst_y_next_start); + dml_print("DML_DLG: %s: dlg_vblank_start = 0x%0x\n", __func__, dlg_vblank_start); + dml_print("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, ref_freq_to_pix_freq); + dml_print("DML_DLG: %s: vba__min_dst_y_next_start = 0x%0x\n", __func__, vba__min_dst_y_next_start); + + //old_impl_vs_vba_impl("min_dst_y_next_start", dlg_vblank_start, vba__min_dst_y_next_start); + + // ------------------------- + // Section 1.15.2.2: Prefetch, Active and TTU + // ------------------------- + // Prefetch Calc + // Source + dual_plane = is_dual_plane((enum source_format_class) (src->source_format)); + mode_422 = 0; + access_dir = (src->source_scan == dm_vert); // vp access direction: horizontal or vertical accessed + vp_height_l = src->viewport_height; + vp_width_l = src->viewport_width; + vp_height_c = src->viewport_height_c; + vp_width_c = src->viewport_width_c; + + // Scaling + htaps_l = taps->htaps; + htaps_c = taps->htaps_c; + hratio_l = scl->hscl_ratio; + hratio_c = scl->hscl_ratio_c; + vratio_l = scl->vscl_ratio; + vratio_c = scl->vscl_ratio_c; + scl_enable = scl->scl_enable; + + swath_width_ub_l = rq_dlg_param->rq_l.swath_width_ub; + dpte_groups_per_row_ub_l = rq_dlg_param->rq_l.dpte_groups_per_row_ub; + swath_width_ub_c = rq_dlg_param->rq_c.swath_width_ub; + dpte_groups_per_row_ub_c = rq_dlg_param->rq_c.dpte_groups_per_row_ub; + + meta_chunks_per_row_ub_l = rq_dlg_param->rq_l.meta_chunks_per_row_ub; + meta_chunks_per_row_ub_c = rq_dlg_param->rq_c.meta_chunks_per_row_ub; + vupdate_offset = dst->vupdate_offset; + vupdate_width = dst->vupdate_width; + vready_offset = dst->vready_offset; + + vstartup_start = dst->vstartup_start; + if (interlaced) { + if (vstartup_start / 2.0 - (double) (vready_offset + vupdate_width + vupdate_offset) / htotal <= vblank_end / 2.0) + disp_dlg_regs->vready_after_vcount0 = 1; + else + disp_dlg_regs->vready_after_vcount0 = 0; + } else { + if (vstartup_start - (double) (vready_offset + vupdate_width + vupdate_offset) / htotal <= vblank_end) + disp_dlg_regs->vready_after_vcount0 = 1; + else + disp_dlg_regs->vready_after_vcount0 = 0; + } + + dml_print("DML_DLG: %s: vready_after_vcount0 = %d\n", __func__, disp_dlg_regs->vready_after_vcount0); + dml_print("DML_DLG: %s: vba__vready_after_vcount0 = %d\n", __func__, vba__vready_after_vcount0); + //old_impl_vs_vba_impl("vready_after_vcount0", disp_dlg_regs->vready_after_vcount0, vba__vready_after_vcount0); + + if (interlaced) + vstartup_start = vstartup_start / 2; + + dst_x_after_scaler = get_dst_x_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA + dst_y_after_scaler = get_dst_y_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA + + // do some adjustment on the dst_after scaler to account for odm combine mode + dml_print("DML_DLG: %s: input dst_x_after_scaler = %d\n", __func__, dst_x_after_scaler); + dml_print("DML_DLG: %s: input dst_y_after_scaler = %d\n", __func__, dst_y_after_scaler); + + // need to figure out which side of odm combine we're in + if (dst->odm_combine) { + // figure out which pipes go together + bool visited[DC__NUM_PIPES__MAX]; + unsigned int i, j, k; + + for (k = 0; k < num_pipes; ++k) { + visited[k] = false; + pipe_index_in_combine[k] = 0; + } + + for (i = 0; i < num_pipes; i++) { + if (e2e_pipe_param[i].pipe.src.is_hsplit && !visited[i]) { + + unsigned int grp = e2e_pipe_param[i].pipe.src.hsplit_grp; + unsigned int grp_idx = 0; + + for (j = i; j < num_pipes; j++) { + if (e2e_pipe_param[j].pipe.src.hsplit_grp == grp && e2e_pipe_param[j].pipe.src.is_hsplit && !visited[j]) { + pipe_index_in_combine[j] = grp_idx; + dml_print("DML_DLG: %s: pipe[%d] is in grp %d idx %d\n", __func__, j, grp, grp_idx); + grp_idx++; + visited[j] = true; + } + } + } + } + + } + + if (dst->odm_combine == dm_odm_combine_mode_disabled) { + disp_dlg_regs->refcyc_h_blank_end = (unsigned int) ((double) hblank_end * ref_freq_to_pix_freq); + } else { + unsigned int odm_combine_factor = (dst->odm_combine == dm_odm_combine_mode_2to1 ? 2 : 4); // TODO: We should really check that 4to1 is supported before setting it to 4 + unsigned int odm_pipe_index = pipe_index_in_combine[pipe_idx]; + disp_dlg_regs->refcyc_h_blank_end = (unsigned int) (((double) hblank_end + odm_pipe_index * (double) dst->hactive / odm_combine_factor) * ref_freq_to_pix_freq); + } ASSERT(disp_dlg_regs->refcyc_h_blank_end < (unsigned int)dml_pow(2, 13)); + + dml_print("DML_DLG: %s: htotal = %d\n", __func__, htotal); + dml_print("DML_DLG: %s: dst_x_after_scaler[%d] = %d\n", __func__, pipe_idx, dst_x_after_scaler); + dml_print("DML_DLG: %s: dst_y_after_scaler[%d] = %d\n", __func__, pipe_idx, dst_y_after_scaler); + + dst_y_prefetch = get_dst_y_prefetch(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA + dst_y_per_vm_vblank = get_dst_y_per_vm_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA + dst_y_per_row_vblank = get_dst_y_per_row_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA + dst_y_per_vm_flip = get_dst_y_per_vm_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA + dst_y_per_row_flip = get_dst_y_per_row_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA + + max_dst_y_per_vm_vblank = 32.0; //U5.2 + max_dst_y_per_row_vblank = 16.0; //U4.2 + + // magic! + if (htotal <= 75) { + max_dst_y_per_vm_vblank = 100.0; + max_dst_y_per_row_vblank = 100.0; + } + + dml_print("DML_DLG: %s: dst_y_prefetch (after rnd) = %3.2f\n", __func__, dst_y_prefetch); + dml_print("DML_DLG: %s: dst_y_per_vm_flip = %3.2f\n", __func__, dst_y_per_vm_flip); + dml_print("DML_DLG: %s: dst_y_per_row_flip = %3.2f\n", __func__, dst_y_per_row_flip); + dml_print("DML_DLG: %s: dst_y_per_vm_vblank = %3.2f\n", __func__, dst_y_per_vm_vblank); + dml_print("DML_DLG: %s: dst_y_per_row_vblank = %3.2f\n", __func__, dst_y_per_row_vblank); + + ASSERT(dst_y_per_vm_vblank < max_dst_y_per_vm_vblank); ASSERT(dst_y_per_row_vblank < max_dst_y_per_row_vblank); + + ASSERT(dst_y_prefetch > (dst_y_per_vm_vblank + dst_y_per_row_vblank)); + + vratio_pre_l = get_vratio_prefetch_l(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA + vratio_pre_c = get_vratio_prefetch_c(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA + + dml_print("DML_DLG: %s: vratio_pre_l = %3.2f\n", __func__, vratio_pre_l); + dml_print("DML_DLG: %s: vratio_pre_c = %3.2f\n", __func__, vratio_pre_c); + + // Active + req_per_swath_ub_l = rq_dlg_param->rq_l.req_per_swath_ub; + req_per_swath_ub_c = rq_dlg_param->rq_c.req_per_swath_ub; + meta_row_height_l = rq_dlg_param->rq_l.meta_row_height; + meta_row_height_c = rq_dlg_param->rq_c.meta_row_height; + swath_width_pixels_ub_l = 0; + swath_width_pixels_ub_c = 0; + scaler_rec_in_width_l = 0; + scaler_rec_in_width_c = 0; + dpte_row_height_l = rq_dlg_param->rq_l.dpte_row_height; + dpte_row_height_c = rq_dlg_param->rq_c.dpte_row_height; + + if (mode_422) { + swath_width_pixels_ub_l = swath_width_ub_l * 2; // *2 for 2 pixel per element + swath_width_pixels_ub_c = swath_width_ub_c * 2; + } else { + swath_width_pixels_ub_l = swath_width_ub_l * 1; + swath_width_pixels_ub_c = swath_width_ub_c * 1; + } + + hscale_pixel_rate_l = 0.; + hscale_pixel_rate_c = 0.; + min_hratio_fact_l = 1.0; + min_hratio_fact_c = 1.0; + + if (hratio_l <= 1) + min_hratio_fact_l = 2.0; + else if (htaps_l <= 6) { + if ((hratio_l * 2.0) > 4.0) + min_hratio_fact_l = 4.0; + else + min_hratio_fact_l = hratio_l * 2.0; + } else { + if (hratio_l > 4.0) + min_hratio_fact_l = 4.0; + else + min_hratio_fact_l = hratio_l; + } + + hscale_pixel_rate_l = min_hratio_fact_l * dppclk_freq_in_mhz; + + dml_print("DML_DLG: %s: hratio_l = %3.2f\n", __func__, hratio_l); + dml_print("DML_DLG: %s: min_hratio_fact_l = %3.2f\n", __func__, min_hratio_fact_l); + dml_print("DML_DLG: %s: hscale_pixel_rate_l = %3.2f\n", __func__, hscale_pixel_rate_l); + + if (hratio_c <= 1) + min_hratio_fact_c = 2.0; + else if (htaps_c <= 6) { + if ((hratio_c * 2.0) > 4.0) + min_hratio_fact_c = 4.0; + else + min_hratio_fact_c = hratio_c * 2.0; + } else { + if (hratio_c > 4.0) + min_hratio_fact_c = 4.0; + else + min_hratio_fact_c = hratio_c; + } + + hscale_pixel_rate_c = min_hratio_fact_c * dppclk_freq_in_mhz; + + refcyc_per_line_delivery_pre_l = 0.; + refcyc_per_line_delivery_pre_c = 0.; + refcyc_per_line_delivery_l = 0.; + refcyc_per_line_delivery_c = 0.; + + refcyc_per_req_delivery_pre_l = 0.; + refcyc_per_req_delivery_pre_c = 0.; + refcyc_per_req_delivery_l = 0.; + refcyc_per_req_delivery_c = 0.; + + full_recout_width = 0; + // In ODM + if (src->is_hsplit) { + // This "hack" is only allowed (and valid) for MPC combine. In ODM + // combine, you MUST specify the full_recout_width...according to Oswin + if (dst->full_recout_width == 0 && !dst->odm_combine) { + dml_print("DML_DLG: %s: Warning: full_recout_width not set in hsplit mode\n", __func__); + full_recout_width = dst->recout_width * 2; // assume half split for dcn1 + } else + full_recout_width = dst->full_recout_width; + } else + full_recout_width = dst->recout_width; + + // As of DCN2, mpc_combine and odm_combine are mutually exclusive + refcyc_per_line_delivery_pre_l = get_refcyc_per_delivery( + mode_lib, + refclk_freq_in_mhz, + pclk_freq_in_mhz, + dst->odm_combine, + full_recout_width, + dst->hactive, + vratio_pre_l, + hscale_pixel_rate_l, + swath_width_pixels_ub_l, + 1); // per line + + refcyc_per_line_delivery_l = get_refcyc_per_delivery( + mode_lib, + refclk_freq_in_mhz, + pclk_freq_in_mhz, + dst->odm_combine, + full_recout_width, + dst->hactive, + vratio_l, + hscale_pixel_rate_l, + swath_width_pixels_ub_l, + 1); // per line + + dml_print("DML_DLG: %s: full_recout_width = %d\n", __func__, full_recout_width); + dml_print("DML_DLG: %s: hscale_pixel_rate_l = %3.2f\n", __func__, hscale_pixel_rate_l); + dml_print("DML_DLG: %s: refcyc_per_line_delivery_pre_l = %3.2f\n", __func__, refcyc_per_line_delivery_pre_l); + dml_print("DML_DLG: %s: refcyc_per_line_delivery_l = %3.2f\n", __func__, refcyc_per_line_delivery_l); + dml_print("DML_DLG: %s: vba__refcyc_per_line_delivery_pre_l = %3.2f\n", __func__, vba__refcyc_per_line_delivery_pre_l); + dml_print("DML_DLG: %s: vba__refcyc_per_line_delivery_l = %3.2f\n", __func__, vba__refcyc_per_line_delivery_l); + + //old_impl_vs_vba_impl("refcyc_per_line_delivery_pre_l", refcyc_per_line_delivery_pre_l, vba__refcyc_per_line_delivery_pre_l); + //old_impl_vs_vba_impl("refcyc_per_line_delivery_l", refcyc_per_line_delivery_l, vba__refcyc_per_line_delivery_l); + + if (dual_plane) { + float vba__refcyc_per_line_delivery_pre_c = get_refcyc_per_line_delivery_pre_c_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA + float vba__refcyc_per_line_delivery_c = get_refcyc_per_line_delivery_c_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA + + refcyc_per_line_delivery_pre_c = get_refcyc_per_delivery( + mode_lib, + refclk_freq_in_mhz, + pclk_freq_in_mhz, + dst->odm_combine, + full_recout_width, + dst->hactive, + vratio_pre_c, + hscale_pixel_rate_c, + swath_width_pixels_ub_c, + 1); // per line + + refcyc_per_line_delivery_c = get_refcyc_per_delivery( + mode_lib, + refclk_freq_in_mhz, + pclk_freq_in_mhz, + dst->odm_combine, + full_recout_width, + dst->hactive, + vratio_c, + hscale_pixel_rate_c, + swath_width_pixels_ub_c, + 1); // per line + + dml_print("DML_DLG: %s: refcyc_per_line_delivery_pre_c = %3.2f\n", __func__, refcyc_per_line_delivery_pre_c); + dml_print("DML_DLG: %s: refcyc_per_line_delivery_c = %3.2f\n", __func__, refcyc_per_line_delivery_c); + dml_print("DML_DLG: %s: vba__refcyc_per_line_delivery_pre_c = %3.2f\n", __func__, vba__refcyc_per_line_delivery_pre_c); + dml_print("DML_DLG: %s: vba__refcyc_per_line_delivery_c = %3.2f\n", __func__, vba__refcyc_per_line_delivery_c); + + //old_impl_vs_vba_impl("refcyc_per_line_delivery_pre_c", refcyc_per_line_delivery_pre_c, vba__refcyc_per_line_delivery_pre_c); + //old_impl_vs_vba_impl("refcyc_per_line_delivery_c", refcyc_per_line_delivery_c, vba__refcyc_per_line_delivery_c); + } + + if (src->dynamic_metadata_enable && src->gpuvm) + disp_dlg_regs->refcyc_per_vm_dmdata = get_refcyc_per_vm_dmdata_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA + + disp_dlg_regs->dmdata_dl_delta = get_dmdata_dl_delta_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA + + // TTU - Luma / Chroma + if (access_dir) { // vertical access + scaler_rec_in_width_l = vp_height_l; + scaler_rec_in_width_c = vp_height_c; + } else { + scaler_rec_in_width_l = vp_width_l; + scaler_rec_in_width_c = vp_width_c; + } + + refcyc_per_req_delivery_pre_l = get_refcyc_per_delivery( + mode_lib, + refclk_freq_in_mhz, + pclk_freq_in_mhz, + dst->odm_combine, + full_recout_width, + dst->hactive, + vratio_pre_l, + hscale_pixel_rate_l, + scaler_rec_in_width_l, + req_per_swath_ub_l); // per req + + refcyc_per_req_delivery_l = get_refcyc_per_delivery( + mode_lib, + refclk_freq_in_mhz, + pclk_freq_in_mhz, + dst->odm_combine, + full_recout_width, + dst->hactive, + vratio_l, + hscale_pixel_rate_l, + scaler_rec_in_width_l, + req_per_swath_ub_l); // per req + + dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_l = %3.2f\n", __func__, refcyc_per_req_delivery_pre_l); + dml_print("DML_DLG: %s: refcyc_per_req_delivery_l = %3.2f\n", __func__, refcyc_per_req_delivery_l); + dml_print("DML_DLG: %s: vba__refcyc_per_req_delivery_pre_l = %3.2f\n", __func__, vba__refcyc_per_req_delivery_pre_l); + dml_print("DML_DLG: %s: vba__refcyc_per_req_delivery_l = %3.2f\n", __func__, vba__refcyc_per_req_delivery_l); + + //old_impl_vs_vba_impl("refcyc_per_req_delivery_pre_l", refcyc_per_req_delivery_pre_l, vba__refcyc_per_req_delivery_pre_l); + //old_impl_vs_vba_impl("refcyc_per_req_delivery_l", refcyc_per_req_delivery_l, vba__refcyc_per_req_delivery_l); + + ASSERT(refcyc_per_req_delivery_pre_l < dml_pow(2, 13)); ASSERT(refcyc_per_req_delivery_l < dml_pow(2, 13)); + + if (dual_plane) { + float vba__refcyc_per_req_delivery_pre_c = get_refcyc_per_req_delivery_pre_c_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA + float vba__refcyc_per_req_delivery_c = get_refcyc_per_req_delivery_c_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA + + refcyc_per_req_delivery_pre_c = get_refcyc_per_delivery( + mode_lib, + refclk_freq_in_mhz, + pclk_freq_in_mhz, + dst->odm_combine, + full_recout_width, + dst->hactive, + vratio_pre_c, + hscale_pixel_rate_c, + scaler_rec_in_width_c, + req_per_swath_ub_c); // per req + refcyc_per_req_delivery_c = get_refcyc_per_delivery( + mode_lib, + refclk_freq_in_mhz, + pclk_freq_in_mhz, + dst->odm_combine, + full_recout_width, + dst->hactive, + vratio_c, + hscale_pixel_rate_c, + scaler_rec_in_width_c, + req_per_swath_ub_c); // per req + + dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_c = %3.2f\n", __func__, refcyc_per_req_delivery_pre_c); + dml_print("DML_DLG: %s: refcyc_per_req_delivery_c = %3.2f\n", __func__, refcyc_per_req_delivery_c); + dml_print("DML_DLG: %s: vba__refcyc_per_req_delivery_pre_c = %3.2f\n", __func__, vba__refcyc_per_req_delivery_pre_c); + dml_print("DML_DLG: %s: vba__refcyc_per_req_delivery_c = %3.2f\n", __func__, vba__refcyc_per_req_delivery_c); + + //old_impl_vs_vba_impl("refcyc_per_req_delivery_pre_c", refcyc_per_req_delivery_pre_c, vba__refcyc_per_req_delivery_pre_c); + //old_impl_vs_vba_impl("refcyc_per_req_delivery_c", refcyc_per_req_delivery_c, vba__refcyc_per_req_delivery_c); + + ASSERT(refcyc_per_req_delivery_pre_c < dml_pow(2, 13)); ASSERT(refcyc_per_req_delivery_c < dml_pow(2, 13)); + } + + // TTU - Cursor + refcyc_per_req_delivery_pre_cur0 = 0.0; + refcyc_per_req_delivery_cur0 = 0.0; + + ASSERT(src->num_cursors <= 1); + + if (src->num_cursors > 0) { + float vba__refcyc_per_req_delivery_pre_cur0; + float vba__refcyc_per_req_delivery_cur0; + + calculate_ttu_cursor( + mode_lib, + &refcyc_per_req_delivery_pre_cur0, + &refcyc_per_req_delivery_cur0, + refclk_freq_in_mhz, + ref_freq_to_pix_freq, + hscale_pixel_rate_l, + scl->hscl_ratio, + vratio_pre_l, + vratio_l, + src->cur0_src_width, + (enum cursor_bpp) (src->cur0_bpp)); + + vba__refcyc_per_req_delivery_pre_cur0 = get_refcyc_per_cursor_req_delivery_pre_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA + vba__refcyc_per_req_delivery_cur0 = get_refcyc_per_cursor_req_delivery_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA + + dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_cur0 = %3.2f\n", __func__, refcyc_per_req_delivery_pre_cur0); + dml_print("DML_DLG: %s: refcyc_per_req_delivery_cur0 = %3.2f\n", __func__, refcyc_per_req_delivery_cur0); + dml_print("DML_DLG: %s: vba__refcyc_per_req_delivery_pre_cur0 = %3.2f\n", __func__, vba__refcyc_per_req_delivery_pre_cur0); + dml_print("DML_DLG: %s: vba__refcyc_per_req_delivery_cur0 = %3.2f\n", __func__, vba__refcyc_per_req_delivery_cur0); + + //old_impl_vs_vba_impl("refcyc_per_req_delivery_pre_cur0", refcyc_per_req_delivery_pre_cur0, vba__refcyc_per_req_delivery_pre_cur0); + //old_impl_vs_vba_impl("refcyc_per_req_delivery_cur0", refcyc_per_req_delivery_cur0, vba__refcyc_per_req_delivery_cur0); + } + + refcyc_per_req_delivery_pre_cur1 = 0.0; + refcyc_per_req_delivery_cur1 = 0.0; + + // TTU - Misc + // all hard-coded + + // Assignment to register structures + disp_dlg_regs->dst_y_after_scaler = dst_y_after_scaler; // in terms of line + ASSERT(disp_dlg_regs->dst_y_after_scaler < (unsigned int)8); + disp_dlg_regs->refcyc_x_after_scaler = dst_x_after_scaler * ref_freq_to_pix_freq; // in terms of refclk + ASSERT(disp_dlg_regs->refcyc_x_after_scaler < (unsigned int)dml_pow(2, 13)); + disp_dlg_regs->dst_y_prefetch = (unsigned int) (dst_y_prefetch * dml_pow(2, 2)); + disp_dlg_regs->dst_y_per_vm_vblank = (unsigned int) (dst_y_per_vm_vblank * dml_pow(2, 2)); + disp_dlg_regs->dst_y_per_row_vblank = (unsigned int) (dst_y_per_row_vblank * dml_pow(2, 2)); + disp_dlg_regs->dst_y_per_vm_flip = (unsigned int) (dst_y_per_vm_flip * dml_pow(2, 2)); + disp_dlg_regs->dst_y_per_row_flip = (unsigned int) (dst_y_per_row_flip * dml_pow(2, 2)); + + disp_dlg_regs->vratio_prefetch = (unsigned int) (vratio_pre_l * dml_pow(2, 19)); + disp_dlg_regs->vratio_prefetch_c = (unsigned int) (vratio_pre_c * dml_pow(2, 19)); + + dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_vblank); + dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_vblank); + dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_flip); + dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_flip); + + // hack for FPGA + if (mode_lib->project == DML_PROJECT_DCN31_FPGA) { + if (disp_dlg_regs->vratio_prefetch >= (unsigned int) dml_pow(2, 22)) { + disp_dlg_regs->vratio_prefetch = (unsigned int) dml_pow(2, 22) - 1; + dml_print("vratio_prefetch exceed the max value, the register field is [21:0]\n"); + } + } + + disp_dlg_regs->refcyc_per_pte_group_vblank_l = (unsigned int) (dst_y_per_row_vblank * (double) htotal * ref_freq_to_pix_freq / (double) dpte_groups_per_row_ub_l); + ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_l < (unsigned int)dml_pow(2, 13)); + + if (dual_plane) { + disp_dlg_regs->refcyc_per_pte_group_vblank_c = (unsigned int) (dst_y_per_row_vblank * (double) htotal * ref_freq_to_pix_freq / (double) dpte_groups_per_row_ub_c); + ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_c < (unsigned int)dml_pow(2, 13)); + } + + disp_dlg_regs->refcyc_per_meta_chunk_vblank_l = (unsigned int) (dst_y_per_row_vblank * (double) htotal * ref_freq_to_pix_freq / (double) meta_chunks_per_row_ub_l); + ASSERT(disp_dlg_regs->refcyc_per_meta_chunk_vblank_l < (unsigned int)dml_pow(2, 13)); + + disp_dlg_regs->refcyc_per_meta_chunk_vblank_c = disp_dlg_regs->refcyc_per_meta_chunk_vblank_l; // dcc for 4:2:0 is not supported in dcn1.0. assigned to be the same as _l for now + + disp_dlg_regs->refcyc_per_pte_group_flip_l = (unsigned int) (dst_y_per_row_flip * htotal * ref_freq_to_pix_freq) / dpte_groups_per_row_ub_l; + disp_dlg_regs->refcyc_per_meta_chunk_flip_l = (unsigned int) (dst_y_per_row_flip * htotal * ref_freq_to_pix_freq) / meta_chunks_per_row_ub_l; + + if (dual_plane) { + disp_dlg_regs->refcyc_per_pte_group_flip_c = (unsigned int) (dst_y_per_row_flip * htotal * ref_freq_to_pix_freq) / dpte_groups_per_row_ub_c; + disp_dlg_regs->refcyc_per_meta_chunk_flip_c = (unsigned int) (dst_y_per_row_flip * htotal * ref_freq_to_pix_freq) / meta_chunks_per_row_ub_c; + } + + disp_dlg_regs->refcyc_per_vm_group_vblank = get_refcyc_per_vm_group_vblank_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA + disp_dlg_regs->refcyc_per_vm_group_flip = get_refcyc_per_vm_group_flip_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA + disp_dlg_regs->refcyc_per_vm_req_vblank = get_refcyc_per_vm_req_vblank_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz * dml_pow(2, 10); // From VBA + disp_dlg_regs->refcyc_per_vm_req_flip = get_refcyc_per_vm_req_flip_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz * dml_pow(2, 10); // From VBA + + // Clamp to max for now + if (disp_dlg_regs->refcyc_per_vm_group_vblank >= (unsigned int) dml_pow(2, 23)) + disp_dlg_regs->refcyc_per_vm_group_vblank = dml_pow(2, 23) - 1; + + if (disp_dlg_regs->refcyc_per_vm_group_flip >= (unsigned int) dml_pow(2, 23)) + disp_dlg_regs->refcyc_per_vm_group_flip = dml_pow(2, 23) - 1; + + if (disp_dlg_regs->refcyc_per_vm_req_vblank >= (unsigned int) dml_pow(2, 23)) + disp_dlg_regs->refcyc_per_vm_req_vblank = dml_pow(2, 23) - 1; + + if (disp_dlg_regs->refcyc_per_vm_req_flip >= (unsigned int) dml_pow(2, 23)) + disp_dlg_regs->refcyc_per_vm_req_flip = dml_pow(2, 23) - 1; + + disp_dlg_regs->dst_y_per_pte_row_nom_l = (unsigned int) ((double) dpte_row_height_l / (double) vratio_l * dml_pow(2, 2)); + ASSERT(disp_dlg_regs->dst_y_per_pte_row_nom_l < (unsigned int)dml_pow(2, 17)); + if (dual_plane) { + disp_dlg_regs->dst_y_per_pte_row_nom_c = (unsigned int) ((double) dpte_row_height_c / (double) vratio_c * dml_pow(2, 2)); + if (disp_dlg_regs->dst_y_per_pte_row_nom_c >= (unsigned int) dml_pow(2, 17)) { + dml_print( + "DML_DLG: %s: Warning dst_y_per_pte_row_nom_c %u larger than supported by register format U15.2 %u\n", + __func__, + disp_dlg_regs->dst_y_per_pte_row_nom_c, + (unsigned int) dml_pow(2, 17) - 1); + } + } + + disp_dlg_regs->dst_y_per_meta_row_nom_l = (unsigned int) ((double) meta_row_height_l / (double) vratio_l * dml_pow(2, 2)); + ASSERT(disp_dlg_regs->dst_y_per_meta_row_nom_l < (unsigned int)dml_pow(2, 17)); + + disp_dlg_regs->dst_y_per_meta_row_nom_c = (unsigned int) ((double) meta_row_height_c / (double) vratio_c * dml_pow(2, 2)); + ASSERT(disp_dlg_regs->dst_y_per_meta_row_nom_c < (unsigned int)dml_pow(2, 17)); + + disp_dlg_regs->refcyc_per_pte_group_nom_l = (unsigned int) ((double) dpte_row_height_l / (double) vratio_l * (double) htotal * ref_freq_to_pix_freq + / (double) dpte_groups_per_row_ub_l); + if (disp_dlg_regs->refcyc_per_pte_group_nom_l >= (unsigned int) dml_pow(2, 23)) + disp_dlg_regs->refcyc_per_pte_group_nom_l = dml_pow(2, 23) - 1; + disp_dlg_regs->refcyc_per_meta_chunk_nom_l = (unsigned int) ((double) meta_row_height_l / (double) vratio_l * (double) htotal * ref_freq_to_pix_freq + / (double) meta_chunks_per_row_ub_l); + if (disp_dlg_regs->refcyc_per_meta_chunk_nom_l >= (unsigned int) dml_pow(2, 23)) + disp_dlg_regs->refcyc_per_meta_chunk_nom_l = dml_pow(2, 23) - 1; + + if (dual_plane) { + disp_dlg_regs->refcyc_per_pte_group_nom_c = (unsigned int) ((double) dpte_row_height_c / (double) vratio_c * (double) htotal * ref_freq_to_pix_freq + / (double) dpte_groups_per_row_ub_c); + if (disp_dlg_regs->refcyc_per_pte_group_nom_c >= (unsigned int) dml_pow(2, 23)) + disp_dlg_regs->refcyc_per_pte_group_nom_c = dml_pow(2, 23) - 1; + + // TODO: Is this the right calculation? Does htotal need to be halved? + disp_dlg_regs->refcyc_per_meta_chunk_nom_c = (unsigned int) ((double) meta_row_height_c / (double) vratio_c * (double) htotal * ref_freq_to_pix_freq + / (double) meta_chunks_per_row_ub_c); + if (disp_dlg_regs->refcyc_per_meta_chunk_nom_c >= (unsigned int) dml_pow(2, 23)) + disp_dlg_regs->refcyc_per_meta_chunk_nom_c = dml_pow(2, 23) - 1; + } + + disp_dlg_regs->refcyc_per_line_delivery_pre_l = (unsigned int) dml_floor(refcyc_per_line_delivery_pre_l, 1); + disp_dlg_regs->refcyc_per_line_delivery_l = (unsigned int) dml_floor(refcyc_per_line_delivery_l, 1); + ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_l < (unsigned int)dml_pow(2, 13)); ASSERT(disp_dlg_regs->refcyc_per_line_delivery_l < (unsigned int)dml_pow(2, 13)); + + disp_dlg_regs->refcyc_per_line_delivery_pre_c = (unsigned int) dml_floor(refcyc_per_line_delivery_pre_c, 1); + disp_dlg_regs->refcyc_per_line_delivery_c = (unsigned int) dml_floor(refcyc_per_line_delivery_c, 1); + ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_c < (unsigned int)dml_pow(2, 13)); ASSERT(disp_dlg_regs->refcyc_per_line_delivery_c < (unsigned int)dml_pow(2, 13)); + + disp_dlg_regs->chunk_hdl_adjust_cur0 = 3; + disp_dlg_regs->dst_y_offset_cur0 = 0; + disp_dlg_regs->chunk_hdl_adjust_cur1 = 3; + disp_dlg_regs->dst_y_offset_cur1 = 0; + + disp_dlg_regs->dst_y_delta_drq_limit = 0x7fff; // off + + disp_ttu_regs->refcyc_per_req_delivery_pre_l = (unsigned int) (refcyc_per_req_delivery_pre_l * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_l = (unsigned int) (refcyc_per_req_delivery_l * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_pre_c = (unsigned int) (refcyc_per_req_delivery_pre_c * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_c = (unsigned int) (refcyc_per_req_delivery_c * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_pre_cur0 = (unsigned int) (refcyc_per_req_delivery_pre_cur0 * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_cur0 = (unsigned int) (refcyc_per_req_delivery_cur0 * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_pre_cur1 = (unsigned int) (refcyc_per_req_delivery_pre_cur1 * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_cur1 = (unsigned int) (refcyc_per_req_delivery_cur1 * dml_pow(2, 10)); + + disp_ttu_regs->qos_level_low_wm = 0; + ASSERT(disp_ttu_regs->qos_level_low_wm < dml_pow(2, 14)); + + disp_ttu_regs->qos_level_high_wm = (unsigned int) (4.0 * (double) htotal * ref_freq_to_pix_freq); + ASSERT(disp_ttu_regs->qos_level_high_wm < dml_pow(2, 14)); + + disp_ttu_regs->qos_level_flip = 14; + disp_ttu_regs->qos_level_fixed_l = 8; + disp_ttu_regs->qos_level_fixed_c = 8; + disp_ttu_regs->qos_level_fixed_cur0 = 8; + disp_ttu_regs->qos_ramp_disable_l = 0; + disp_ttu_regs->qos_ramp_disable_c = 0; + disp_ttu_regs->qos_ramp_disable_cur0 = 0; + + disp_ttu_regs->min_ttu_vblank = min_ttu_vblank * refclk_freq_in_mhz; + ASSERT(disp_ttu_regs->min_ttu_vblank < dml_pow(2, 24)); + + print__ttu_regs_st(mode_lib, disp_ttu_regs); + print__dlg_regs_st(mode_lib, disp_dlg_regs); +} + +void dml31_rq_dlg_get_dlg_reg( + struct display_mode_lib *mode_lib, + display_dlg_regs_st *dlg_regs, + display_ttu_regs_st *ttu_regs, + const display_e2e_pipe_params_st *e2e_pipe_param, + const unsigned int num_pipes, + const unsigned int pipe_idx, + const bool cstate_en, + const bool pstate_en, + const bool vm_en, + const bool ignore_viewport_pos, + const bool immediate_flip_support) +{ + display_rq_params_st rq_param = {0}; + display_dlg_sys_params_st dlg_sys_param = {0}; + + // Get watermark and Tex. + dlg_sys_param.t_urg_wm_us = get_wm_urgent(mode_lib, e2e_pipe_param, num_pipes); + dlg_sys_param.deepsleep_dcfclk_mhz = get_clk_dcf_deepsleep(mode_lib, e2e_pipe_param, num_pipes); + dlg_sys_param.t_extra_us = get_urgent_extra_latency(mode_lib, e2e_pipe_param, num_pipes); + dlg_sys_param.mem_trip_us = get_wm_memory_trip(mode_lib, e2e_pipe_param, num_pipes); + dlg_sys_param.t_mclk_wm_us = get_wm_dram_clock_change(mode_lib, e2e_pipe_param, num_pipes); + dlg_sys_param.t_sr_wm_us = get_wm_stutter_enter_exit(mode_lib, e2e_pipe_param, num_pipes); + dlg_sys_param.total_flip_bw = get_total_immediate_flip_bw(mode_lib, e2e_pipe_param, num_pipes); + dlg_sys_param.total_flip_bytes = get_total_immediate_flip_bytes(mode_lib, e2e_pipe_param, num_pipes); + + print__dlg_sys_params_st(mode_lib, &dlg_sys_param); + + // system parameter calculation done + + dml_print("DML_DLG: Calculation for pipe[%d] start\n\n", pipe_idx); + dml_rq_dlg_get_rq_params(mode_lib, &rq_param, &e2e_pipe_param[pipe_idx].pipe); + dml_rq_dlg_get_dlg_params( + mode_lib, + e2e_pipe_param, + num_pipes, + pipe_idx, + dlg_regs, + ttu_regs, + &rq_param.dlg, + &dlg_sys_param, + cstate_en, + pstate_en, + vm_en, + ignore_viewport_pos, + immediate_flip_support); + dml_print("DML_DLG: Calculation for pipe[%d] end\n", pipe_idx); +} + diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.h b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.h new file mode 100644 index 000000000..8ee991351 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.h @@ -0,0 +1,69 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DML31_DISPLAY_RQ_DLG_CALC_H__ +#define __DML31_DISPLAY_RQ_DLG_CALC_H__ + +#include "../display_rq_dlg_helpers.h" + +struct display_mode_lib; + +// Function: dml_rq_dlg_get_rq_reg +// Main entry point for test to get the register values out of this DML class. +// This function calls <get_rq_param> and <extract_rq_regs> fucntions to calculate +// and then populate the rq_regs struct +// Input: +// pipe_param - pipe source configuration (e.g. vp, pitch, scaling, dest, etc.) +// Output: +// rq_regs - struct that holds all the RQ registers field value. +// See also: <display_rq_regs_st> +void dml31_rq_dlg_get_rq_reg(struct display_mode_lib *mode_lib, + display_rq_regs_st *rq_regs, + const display_pipe_params_st *pipe_param); + +// Function: dml_rq_dlg_get_dlg_reg +// Calculate and return DLG and TTU register struct given the system setting +// Output: +// dlg_regs - output DLG register struct +// ttu_regs - output DLG TTU register struct +// Input: +// e2e_pipe_param - "compacted" array of e2e pipe param struct +// num_pipes - num of active "pipe" or "route" +// pipe_idx - index that identifies the e2e_pipe_param that corresponding to this dlg +// cstate - 0: when calculate min_ttu_vblank it is assumed cstate is not required. 1: Normal mode, cstate is considered. +// Added for legacy or unrealistic timing tests. +void dml31_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib, + display_dlg_regs_st *dlg_regs, + display_ttu_regs_st *ttu_regs, + const display_e2e_pipe_params_st *e2e_pipe_param, + const unsigned int num_pipes, + const unsigned int pipe_idx, + const bool cstate_en, + const bool pstate_en, + const bool vm_en, + const bool ignore_viewport_pos, + const bool immediate_flip_support); + +#endif |