diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:49:45 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:49:45 +0000 |
commit | 2c3c1048746a4622d8c89a29670120dc8fab93c4 (patch) | |
tree | 848558de17fb3008cdf4d861b01ac7781903ce39 /drivers/gpu/drm/amd/display/dc/dml/dcn20 | |
parent | Initial commit. (diff) | |
download | linux-2c3c1048746a4622d8c89a29670120dc8fab93c4.tar.xz linux-2c3c1048746a4622d8c89a29670120dc8fab93c4.zip |
Adding upstream version 6.1.76.upstream/6.1.76
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'drivers/gpu/drm/amd/display/dc/dml/dcn20')
10 files changed, 16515 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c new file mode 100644 index 000000000..ce893fe1c --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c @@ -0,0 +1,2495 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "resource.h" +#include "clk_mgr.h" +#include "dc_link_dp.h" +#include "dchubbub.h" +#include "dcn20/dcn20_resource.h" +#include "dcn21/dcn21_resource.h" +#include "clk_mgr/dcn21/rn_clk_mgr.h" + +#include "dcn20_fpu.h" + +#define DC_LOGGER_INIT(logger) + +#ifndef MAX +#define MAX(X, Y) ((X) > (Y) ? (X) : (Y)) +#endif +#ifndef MIN +#define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) +#endif + +/* Constant */ +#define LPDDR_MEM_RETRAIN_LATENCY 4.977 /* Number obtained from LPDDR4 Training Counter Requirement doc */ + +/** + * DOC: DCN2x FPU manipulation Overview + * + * The DCN architecture relies on FPU operations, which require special + * compilation flags and the use of kernel_fpu_begin/end functions; ideally, we + * want to avoid spreading FPU access across multiple files. With this idea in + * mind, this file aims to centralize all DCN20 and DCN2.1 (DCN2x) functions + * that require FPU access in a single place. Code in this file follows the + * following code pattern: + * + * 1. Functions that use FPU operations should be isolated in static functions. + * 2. The FPU functions should have the noinline attribute to ensure anything + * that deals with FP register is contained within this call. + * 3. All function that needs to be accessed outside this file requires a + * public interface that not uses any FPU reference. + * 4. Developers **must not** use DC_FP_START/END in this file, but they need + * to ensure that the caller invokes it before access any function available + * in this file. For this reason, public functions in this file must invoke + * dc_assert_fp_enabled(); + * + * Let's expand a little bit more the idea in the code pattern. To fully + * isolate FPU operations in a single place, we must avoid situations where + * compilers spill FP values to registers due to FP enable in a specific C + * file. Note that even if we isolate all FPU functions in a single file and + * call its interface from other files, the compiler might enable the use of + * FPU before we call DC_FP_START. Nevertheless, it is the programmer's + * responsibility to invoke DC_FP_START/END in the correct place. To highlight + * situations where developers forgot to use the FP protection before calling + * the DC FPU interface functions, we introduce a helper that checks if the + * function is invoked under FP protection. If not, it will trigger a kernel + * warning. + */ + +struct _vcs_dpi_ip_params_st dcn2_0_ip = { + .odm_capable = 1, + .gpuvm_enable = 0, + .hostvm_enable = 0, + .gpuvm_max_page_table_levels = 4, + .hostvm_max_page_table_levels = 4, + .hostvm_cached_page_table_levels = 0, + .pte_group_size_bytes = 2048, + .num_dsc = 6, + .rob_buffer_size_kbytes = 168, + .det_buffer_size_kbytes = 164, + .dpte_buffer_size_in_pte_reqs_luma = 84, + .pde_proc_buffer_size_64k_reqs = 48, + .dpp_output_buffer_pixels = 2560, + .opp_output_buffer_lines = 1, + .pixel_chunk_size_kbytes = 8, + .pte_chunk_size_kbytes = 2, + .meta_chunk_size_kbytes = 2, + .writeback_chunk_size_kbytes = 2, + .line_buffer_size_bits = 789504, + .is_line_buffer_bpp_fixed = 0, + .line_buffer_fixed_bpp = 0, + .dcc_supported = true, + .max_line_buffer_lines = 12, + .writeback_luma_buffer_size_kbytes = 12, + .writeback_chroma_buffer_size_kbytes = 8, + .writeback_chroma_line_buffer_width_pixels = 4, + .writeback_max_hscl_ratio = 1, + .writeback_max_vscl_ratio = 1, + .writeback_min_hscl_ratio = 1, + .writeback_min_vscl_ratio = 1, + .writeback_max_hscl_taps = 12, + .writeback_max_vscl_taps = 12, + .writeback_line_buffer_luma_buffer_size = 0, + .writeback_line_buffer_chroma_buffer_size = 14643, + .cursor_buffer_size = 8, + .cursor_chunk_size = 2, + .max_num_otg = 6, + .max_num_dpp = 6, + .max_num_wb = 1, + .max_dchub_pscl_bw_pix_per_clk = 4, + .max_pscl_lb_bw_pix_per_clk = 2, + .max_lb_vscl_bw_pix_per_clk = 4, + .max_vscl_hscl_bw_pix_per_clk = 4, + .max_hscl_ratio = 8, + .max_vscl_ratio = 8, + .hscl_mults = 4, + .vscl_mults = 4, + .max_hscl_taps = 8, + .max_vscl_taps = 8, + .dispclk_ramp_margin_percent = 1, + .underscan_factor = 1.10, + .min_vblank_lines = 32, // + .dppclk_delay_subtotal = 77, // + .dppclk_delay_scl_lb_only = 16, + .dppclk_delay_scl = 50, + .dppclk_delay_cnvc_formatter = 8, + .dppclk_delay_cnvc_cursor = 6, + .dispclk_delay_subtotal = 87, // + .dcfclk_cstate_latency = 10, // SRExitTime + .max_inter_dcn_tile_repeaters = 8, + .xfc_supported = true, + .xfc_fill_bw_overhead_percent = 10.0, + .xfc_fill_constant_bytes = 0, + .number_of_cursors = 1, +}; + +struct _vcs_dpi_ip_params_st dcn2_0_nv14_ip = { + .odm_capable = 1, + .gpuvm_enable = 0, + .hostvm_enable = 0, + .gpuvm_max_page_table_levels = 4, + .hostvm_max_page_table_levels = 4, + .hostvm_cached_page_table_levels = 0, + .num_dsc = 5, + .rob_buffer_size_kbytes = 168, + .det_buffer_size_kbytes = 164, + .dpte_buffer_size_in_pte_reqs_luma = 84, + .dpte_buffer_size_in_pte_reqs_chroma = 42,//todo + .dpp_output_buffer_pixels = 2560, + .opp_output_buffer_lines = 1, + .pixel_chunk_size_kbytes = 8, + .pte_enable = 1, + .max_page_table_levels = 4, + .pte_chunk_size_kbytes = 2, + .meta_chunk_size_kbytes = 2, + .writeback_chunk_size_kbytes = 2, + .line_buffer_size_bits = 789504, + .is_line_buffer_bpp_fixed = 0, + .line_buffer_fixed_bpp = 0, + .dcc_supported = true, + .max_line_buffer_lines = 12, + .writeback_luma_buffer_size_kbytes = 12, + .writeback_chroma_buffer_size_kbytes = 8, + .writeback_chroma_line_buffer_width_pixels = 4, + .writeback_max_hscl_ratio = 1, + .writeback_max_vscl_ratio = 1, + .writeback_min_hscl_ratio = 1, + .writeback_min_vscl_ratio = 1, + .writeback_max_hscl_taps = 12, + .writeback_max_vscl_taps = 12, + .writeback_line_buffer_luma_buffer_size = 0, + .writeback_line_buffer_chroma_buffer_size = 14643, + .cursor_buffer_size = 8, + .cursor_chunk_size = 2, + .max_num_otg = 5, + .max_num_dpp = 5, + .max_num_wb = 1, + .max_dchub_pscl_bw_pix_per_clk = 4, + .max_pscl_lb_bw_pix_per_clk = 2, + .max_lb_vscl_bw_pix_per_clk = 4, + .max_vscl_hscl_bw_pix_per_clk = 4, + .max_hscl_ratio = 8, + .max_vscl_ratio = 8, + .hscl_mults = 4, + .vscl_mults = 4, + .max_hscl_taps = 8, + .max_vscl_taps = 8, + .dispclk_ramp_margin_percent = 1, + .underscan_factor = 1.10, + .min_vblank_lines = 32, // + .dppclk_delay_subtotal = 77, // + .dppclk_delay_scl_lb_only = 16, + .dppclk_delay_scl = 50, + .dppclk_delay_cnvc_formatter = 8, + .dppclk_delay_cnvc_cursor = 6, + .dispclk_delay_subtotal = 87, // + .dcfclk_cstate_latency = 10, // SRExitTime + .max_inter_dcn_tile_repeaters = 8, + .xfc_supported = true, + .xfc_fill_bw_overhead_percent = 10.0, + .xfc_fill_constant_bytes = 0, + .ptoi_supported = 0, + .number_of_cursors = 1, +}; + +struct _vcs_dpi_soc_bounding_box_st dcn2_0_soc = { + /* Defaults that get patched on driver load from firmware. */ + .clock_limits = { + { + .state = 0, + .dcfclk_mhz = 560.0, + .fabricclk_mhz = 560.0, + .dispclk_mhz = 513.0, + .dppclk_mhz = 513.0, + .phyclk_mhz = 540.0, + .socclk_mhz = 560.0, + .dscclk_mhz = 171.0, + .dram_speed_mts = 8960.0, + }, + { + .state = 1, + .dcfclk_mhz = 694.0, + .fabricclk_mhz = 694.0, + .dispclk_mhz = 642.0, + .dppclk_mhz = 642.0, + .phyclk_mhz = 600.0, + .socclk_mhz = 694.0, + .dscclk_mhz = 214.0, + .dram_speed_mts = 11104.0, + }, + { + .state = 2, + .dcfclk_mhz = 875.0, + .fabricclk_mhz = 875.0, + .dispclk_mhz = 734.0, + .dppclk_mhz = 734.0, + .phyclk_mhz = 810.0, + .socclk_mhz = 875.0, + .dscclk_mhz = 245.0, + .dram_speed_mts = 14000.0, + }, + { + .state = 3, + .dcfclk_mhz = 1000.0, + .fabricclk_mhz = 1000.0, + .dispclk_mhz = 1100.0, + .dppclk_mhz = 1100.0, + .phyclk_mhz = 810.0, + .socclk_mhz = 1000.0, + .dscclk_mhz = 367.0, + .dram_speed_mts = 16000.0, + }, + { + .state = 4, + .dcfclk_mhz = 1200.0, + .fabricclk_mhz = 1200.0, + .dispclk_mhz = 1284.0, + .dppclk_mhz = 1284.0, + .phyclk_mhz = 810.0, + .socclk_mhz = 1200.0, + .dscclk_mhz = 428.0, + .dram_speed_mts = 16000.0, + }, + /*Extra state, no dispclk ramping*/ + { + .state = 5, + .dcfclk_mhz = 1200.0, + .fabricclk_mhz = 1200.0, + .dispclk_mhz = 1284.0, + .dppclk_mhz = 1284.0, + .phyclk_mhz = 810.0, + .socclk_mhz = 1200.0, + .dscclk_mhz = 428.0, + .dram_speed_mts = 16000.0, + }, + }, + .num_states = 5, + .sr_exit_time_us = 8.6, + .sr_enter_plus_exit_time_us = 10.9, + .urgent_latency_us = 4.0, + .urgent_latency_pixel_data_only_us = 4.0, + .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, + .urgent_latency_vm_data_only_us = 4.0, + .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, + .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, + .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 40.0, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 40.0, + .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 40.0, + .max_avg_sdp_bw_use_normal_percent = 40.0, + .max_avg_dram_bw_use_normal_percent = 40.0, + .writeback_latency_us = 12.0, + .ideal_dram_bw_after_urgent_percent = 40.0, + .max_request_size_bytes = 256, + .dram_channel_width_bytes = 2, + .fabric_datapath_to_dcn_data_return_bytes = 64, + .dcn_downspread_percent = 0.5, + .downspread_percent = 0.38, + .dram_page_open_time_ns = 50.0, + .dram_rw_turnaround_time_ns = 17.5, + .dram_return_buffer_per_channel_bytes = 8192, + .round_trip_ping_latency_dcfclk_cycles = 131, + .urgent_out_of_order_return_per_channel_bytes = 256, + .channel_interleave_bytes = 256, + .num_banks = 8, + .num_chans = 16, + .vmm_page_size_bytes = 4096, + .dram_clock_change_latency_us = 404.0, + .dummy_pstate_latency_us = 5.0, + .writeback_dram_clock_change_latency_us = 23.0, + .return_bus_width_bytes = 64, + .dispclk_dppclk_vco_speed_mhz = 3850, + .xfc_bus_transport_time_us = 20, + .xfc_xbuf_latency_tolerance_us = 4, + .use_urgent_burst_bw = 0 +}; + +struct _vcs_dpi_soc_bounding_box_st dcn2_0_nv14_soc = { + .clock_limits = { + { + .state = 0, + .dcfclk_mhz = 560.0, + .fabricclk_mhz = 560.0, + .dispclk_mhz = 513.0, + .dppclk_mhz = 513.0, + .phyclk_mhz = 540.0, + .socclk_mhz = 560.0, + .dscclk_mhz = 171.0, + .dram_speed_mts = 8960.0, + }, + { + .state = 1, + .dcfclk_mhz = 694.0, + .fabricclk_mhz = 694.0, + .dispclk_mhz = 642.0, + .dppclk_mhz = 642.0, + .phyclk_mhz = 600.0, + .socclk_mhz = 694.0, + .dscclk_mhz = 214.0, + .dram_speed_mts = 11104.0, + }, + { + .state = 2, + .dcfclk_mhz = 875.0, + .fabricclk_mhz = 875.0, + .dispclk_mhz = 734.0, + .dppclk_mhz = 734.0, + .phyclk_mhz = 810.0, + .socclk_mhz = 875.0, + .dscclk_mhz = 245.0, + .dram_speed_mts = 14000.0, + }, + { + .state = 3, + .dcfclk_mhz = 1000.0, + .fabricclk_mhz = 1000.0, + .dispclk_mhz = 1100.0, + .dppclk_mhz = 1100.0, + .phyclk_mhz = 810.0, + .socclk_mhz = 1000.0, + .dscclk_mhz = 367.0, + .dram_speed_mts = 16000.0, + }, + { + .state = 4, + .dcfclk_mhz = 1200.0, + .fabricclk_mhz = 1200.0, + .dispclk_mhz = 1284.0, + .dppclk_mhz = 1284.0, + .phyclk_mhz = 810.0, + .socclk_mhz = 1200.0, + .dscclk_mhz = 428.0, + .dram_speed_mts = 16000.0, + }, + /*Extra state, no dispclk ramping*/ + { + .state = 5, + .dcfclk_mhz = 1200.0, + .fabricclk_mhz = 1200.0, + .dispclk_mhz = 1284.0, + .dppclk_mhz = 1284.0, + .phyclk_mhz = 810.0, + .socclk_mhz = 1200.0, + .dscclk_mhz = 428.0, + .dram_speed_mts = 16000.0, + }, + }, + .num_states = 5, + .sr_exit_time_us = 11.6, + .sr_enter_plus_exit_time_us = 13.9, + .urgent_latency_us = 4.0, + .urgent_latency_pixel_data_only_us = 4.0, + .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, + .urgent_latency_vm_data_only_us = 4.0, + .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, + .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, + .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 40.0, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 40.0, + .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 40.0, + .max_avg_sdp_bw_use_normal_percent = 40.0, + .max_avg_dram_bw_use_normal_percent = 40.0, + .writeback_latency_us = 12.0, + .ideal_dram_bw_after_urgent_percent = 40.0, + .max_request_size_bytes = 256, + .dram_channel_width_bytes = 2, + .fabric_datapath_to_dcn_data_return_bytes = 64, + .dcn_downspread_percent = 0.5, + .downspread_percent = 0.38, + .dram_page_open_time_ns = 50.0, + .dram_rw_turnaround_time_ns = 17.5, + .dram_return_buffer_per_channel_bytes = 8192, + .round_trip_ping_latency_dcfclk_cycles = 131, + .urgent_out_of_order_return_per_channel_bytes = 256, + .channel_interleave_bytes = 256, + .num_banks = 8, + .num_chans = 8, + .vmm_page_size_bytes = 4096, + .dram_clock_change_latency_us = 404.0, + .dummy_pstate_latency_us = 5.0, + .writeback_dram_clock_change_latency_us = 23.0, + .return_bus_width_bytes = 64, + .dispclk_dppclk_vco_speed_mhz = 3850, + .xfc_bus_transport_time_us = 20, + .xfc_xbuf_latency_tolerance_us = 4, + .use_urgent_burst_bw = 0 +}; + +struct _vcs_dpi_soc_bounding_box_st dcn2_0_nv12_soc = { + .clock_limits = { + { + .state = 0, + .dcfclk_mhz = 560.0, + .fabricclk_mhz = 560.0, + .dispclk_mhz = 513.0, + .dppclk_mhz = 513.0, + .phyclk_mhz = 540.0, + .socclk_mhz = 560.0, + .dscclk_mhz = 171.0, + .dram_speed_mts = 1069.0, + }, + { + .state = 1, + .dcfclk_mhz = 694.0, + .fabricclk_mhz = 694.0, + .dispclk_mhz = 642.0, + .dppclk_mhz = 642.0, + .phyclk_mhz = 600.0, + .socclk_mhz = 694.0, + .dscclk_mhz = 214.0, + .dram_speed_mts = 1324.0, + }, + { + .state = 2, + .dcfclk_mhz = 875.0, + .fabricclk_mhz = 875.0, + .dispclk_mhz = 734.0, + .dppclk_mhz = 734.0, + .phyclk_mhz = 810.0, + .socclk_mhz = 875.0, + .dscclk_mhz = 245.0, + .dram_speed_mts = 1670.0, + }, + { + .state = 3, + .dcfclk_mhz = 1000.0, + .fabricclk_mhz = 1000.0, + .dispclk_mhz = 1100.0, + .dppclk_mhz = 1100.0, + .phyclk_mhz = 810.0, + .socclk_mhz = 1000.0, + .dscclk_mhz = 367.0, + .dram_speed_mts = 2000.0, + }, + { + .state = 4, + .dcfclk_mhz = 1200.0, + .fabricclk_mhz = 1200.0, + .dispclk_mhz = 1284.0, + .dppclk_mhz = 1284.0, + .phyclk_mhz = 810.0, + .socclk_mhz = 1200.0, + .dscclk_mhz = 428.0, + .dram_speed_mts = 2000.0, + }, + { + .state = 5, + .dcfclk_mhz = 1200.0, + .fabricclk_mhz = 1200.0, + .dispclk_mhz = 1284.0, + .dppclk_mhz = 1284.0, + .phyclk_mhz = 810.0, + .socclk_mhz = 1200.0, + .dscclk_mhz = 428.0, + .dram_speed_mts = 2000.0, + }, + }, + + .num_states = 5, + .sr_exit_time_us = 1.9, + .sr_enter_plus_exit_time_us = 4.4, + .urgent_latency_us = 3.0, + .urgent_latency_pixel_data_only_us = 4.0, + .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, + .urgent_latency_vm_data_only_us = 4.0, + .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, + .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, + .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 40.0, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 40.0, + .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 40.0, + .max_avg_sdp_bw_use_normal_percent = 40.0, + .max_avg_dram_bw_use_normal_percent = 40.0, + .writeback_latency_us = 12.0, + .ideal_dram_bw_after_urgent_percent = 40.0, + .max_request_size_bytes = 256, + .dram_channel_width_bytes = 16, + .fabric_datapath_to_dcn_data_return_bytes = 64, + .dcn_downspread_percent = 0.5, + .downspread_percent = 0.5, + .dram_page_open_time_ns = 50.0, + .dram_rw_turnaround_time_ns = 17.5, + .dram_return_buffer_per_channel_bytes = 8192, + .round_trip_ping_latency_dcfclk_cycles = 131, + .urgent_out_of_order_return_per_channel_bytes = 4096, + .channel_interleave_bytes = 256, + .num_banks = 8, + .num_chans = 16, + .vmm_page_size_bytes = 4096, + .dram_clock_change_latency_us = 45.0, + .writeback_dram_clock_change_latency_us = 23.0, + .return_bus_width_bytes = 64, + .dispclk_dppclk_vco_speed_mhz = 3850, + .xfc_bus_transport_time_us = 20, + .xfc_xbuf_latency_tolerance_us = 50, + .use_urgent_burst_bw = 0, +}; + +struct _vcs_dpi_ip_params_st dcn2_1_ip = { + .odm_capable = 1, + .gpuvm_enable = 1, + .hostvm_enable = 1, + .gpuvm_max_page_table_levels = 1, + .hostvm_max_page_table_levels = 4, + .hostvm_cached_page_table_levels = 2, + .num_dsc = 3, + .rob_buffer_size_kbytes = 168, + .det_buffer_size_kbytes = 164, + .dpte_buffer_size_in_pte_reqs_luma = 44, + .dpte_buffer_size_in_pte_reqs_chroma = 42,//todo + .dpp_output_buffer_pixels = 2560, + .opp_output_buffer_lines = 1, + .pixel_chunk_size_kbytes = 8, + .pte_enable = 1, + .max_page_table_levels = 4, + .pte_chunk_size_kbytes = 2, + .meta_chunk_size_kbytes = 2, + .min_meta_chunk_size_bytes = 256, + .writeback_chunk_size_kbytes = 2, + .line_buffer_size_bits = 789504, + .is_line_buffer_bpp_fixed = 0, + .line_buffer_fixed_bpp = 0, + .dcc_supported = true, + .max_line_buffer_lines = 12, + .writeback_luma_buffer_size_kbytes = 12, + .writeback_chroma_buffer_size_kbytes = 8, + .writeback_chroma_line_buffer_width_pixels = 4, + .writeback_max_hscl_ratio = 1, + .writeback_max_vscl_ratio = 1, + .writeback_min_hscl_ratio = 1, + .writeback_min_vscl_ratio = 1, + .writeback_max_hscl_taps = 12, + .writeback_max_vscl_taps = 12, + .writeback_line_buffer_luma_buffer_size = 0, + .writeback_line_buffer_chroma_buffer_size = 14643, + .cursor_buffer_size = 8, + .cursor_chunk_size = 2, + .max_num_otg = 4, + .max_num_dpp = 4, + .max_num_wb = 1, + .max_dchub_pscl_bw_pix_per_clk = 4, + .max_pscl_lb_bw_pix_per_clk = 2, + .max_lb_vscl_bw_pix_per_clk = 4, + .max_vscl_hscl_bw_pix_per_clk = 4, + .max_hscl_ratio = 4, + .max_vscl_ratio = 4, + .hscl_mults = 4, + .vscl_mults = 4, + .max_hscl_taps = 8, + .max_vscl_taps = 8, + .dispclk_ramp_margin_percent = 1, + .underscan_factor = 1.10, + .min_vblank_lines = 32, // + .dppclk_delay_subtotal = 77, // + .dppclk_delay_scl_lb_only = 16, + .dppclk_delay_scl = 50, + .dppclk_delay_cnvc_formatter = 8, + .dppclk_delay_cnvc_cursor = 6, + .dispclk_delay_subtotal = 87, // + .dcfclk_cstate_latency = 10, // SRExitTime + .max_inter_dcn_tile_repeaters = 8, + + .xfc_supported = false, + .xfc_fill_bw_overhead_percent = 10.0, + .xfc_fill_constant_bytes = 0, + .ptoi_supported = 0, + .number_of_cursors = 1, +}; + +struct _vcs_dpi_soc_bounding_box_st dcn2_1_soc = { + .clock_limits = { + { + .state = 0, + .dcfclk_mhz = 400.0, + .fabricclk_mhz = 400.0, + .dispclk_mhz = 600.0, + .dppclk_mhz = 400.00, + .phyclk_mhz = 600.0, + .socclk_mhz = 278.0, + .dscclk_mhz = 205.67, + .dram_speed_mts = 1600.0, + }, + { + .state = 1, + .dcfclk_mhz = 464.52, + .fabricclk_mhz = 800.0, + .dispclk_mhz = 654.55, + .dppclk_mhz = 626.09, + .phyclk_mhz = 600.0, + .socclk_mhz = 278.0, + .dscclk_mhz = 205.67, + .dram_speed_mts = 1600.0, + }, + { + .state = 2, + .dcfclk_mhz = 514.29, + .fabricclk_mhz = 933.0, + .dispclk_mhz = 757.89, + .dppclk_mhz = 685.71, + .phyclk_mhz = 600.0, + .socclk_mhz = 278.0, + .dscclk_mhz = 287.67, + .dram_speed_mts = 1866.0, + }, + { + .state = 3, + .dcfclk_mhz = 576.00, + .fabricclk_mhz = 1067.0, + .dispclk_mhz = 847.06, + .dppclk_mhz = 757.89, + .phyclk_mhz = 600.0, + .socclk_mhz = 715.0, + .dscclk_mhz = 318.334, + .dram_speed_mts = 2134.0, + }, + { + .state = 4, + .dcfclk_mhz = 626.09, + .fabricclk_mhz = 1200.0, + .dispclk_mhz = 900.00, + .dppclk_mhz = 847.06, + .phyclk_mhz = 810.0, + .socclk_mhz = 953.0, + .dscclk_mhz = 489.0, + .dram_speed_mts = 2400.0, + }, + { + .state = 5, + .dcfclk_mhz = 685.71, + .fabricclk_mhz = 1333.0, + .dispclk_mhz = 1028.57, + .dppclk_mhz = 960.00, + .phyclk_mhz = 810.0, + .socclk_mhz = 278.0, + .dscclk_mhz = 287.67, + .dram_speed_mts = 2666.0, + }, + { + .state = 6, + .dcfclk_mhz = 757.89, + .fabricclk_mhz = 1467.0, + .dispclk_mhz = 1107.69, + .dppclk_mhz = 1028.57, + .phyclk_mhz = 810.0, + .socclk_mhz = 715.0, + .dscclk_mhz = 318.334, + .dram_speed_mts = 3200.0, + }, + { + .state = 7, + .dcfclk_mhz = 847.06, + .fabricclk_mhz = 1600.0, + .dispclk_mhz = 1395.0, + .dppclk_mhz = 1285.00, + .phyclk_mhz = 1325.0, + .socclk_mhz = 953.0, + .dscclk_mhz = 489.0, + .dram_speed_mts = 4266.0, + }, + /*Extra state, no dispclk ramping*/ + { + .state = 8, + .dcfclk_mhz = 847.06, + .fabricclk_mhz = 1600.0, + .dispclk_mhz = 1395.0, + .dppclk_mhz = 1285.0, + .phyclk_mhz = 1325.0, + .socclk_mhz = 953.0, + .dscclk_mhz = 489.0, + .dram_speed_mts = 4266.0, + }, + + }, + + .sr_exit_time_us = 12.5, + .sr_enter_plus_exit_time_us = 17.0, + .urgent_latency_us = 4.0, + .urgent_latency_pixel_data_only_us = 4.0, + .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, + .urgent_latency_vm_data_only_us = 4.0, + .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, + .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, + .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 80.0, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 75.0, + .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 40.0, + .max_avg_sdp_bw_use_normal_percent = 60.0, + .max_avg_dram_bw_use_normal_percent = 100.0, + .writeback_latency_us = 12.0, + .max_request_size_bytes = 256, + .dram_channel_width_bytes = 4, + .fabric_datapath_to_dcn_data_return_bytes = 32, + .dcn_downspread_percent = 0.5, + .downspread_percent = 0.38, + .dram_page_open_time_ns = 50.0, + .dram_rw_turnaround_time_ns = 17.5, + .dram_return_buffer_per_channel_bytes = 8192, + .round_trip_ping_latency_dcfclk_cycles = 128, + .urgent_out_of_order_return_per_channel_bytes = 4096, + .channel_interleave_bytes = 256, + .num_banks = 8, + .num_chans = 4, + .vmm_page_size_bytes = 4096, + .dram_clock_change_latency_us = 23.84, + .return_bus_width_bytes = 64, + .dispclk_dppclk_vco_speed_mhz = 3600, + .xfc_bus_transport_time_us = 4, + .xfc_xbuf_latency_tolerance_us = 4, + .use_urgent_burst_bw = 1, + .num_states = 8 +}; + +struct wm_table ddr4_wm_table_gs = { + .entries = { + { + .wm_inst = WM_A, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 7.09, + .sr_enter_plus_exit_time_us = 8.14, + .valid = true, + }, + { + .wm_inst = WM_B, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 10.12, + .sr_enter_plus_exit_time_us = 11.48, + .valid = true, + }, + { + .wm_inst = WM_C, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 10.12, + .sr_enter_plus_exit_time_us = 11.48, + .valid = true, + }, + { + .wm_inst = WM_D, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 10.12, + .sr_enter_plus_exit_time_us = 11.48, + .valid = true, + }, + } +}; + +struct wm_table lpddr4_wm_table_gs = { + .entries = { + { + .wm_inst = WM_A, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 5.32, + .sr_enter_plus_exit_time_us = 6.38, + .valid = true, + }, + { + .wm_inst = WM_B, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 9.82, + .sr_enter_plus_exit_time_us = 11.196, + .valid = true, + }, + { + .wm_inst = WM_C, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 9.89, + .sr_enter_plus_exit_time_us = 11.24, + .valid = true, + }, + { + .wm_inst = WM_D, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 9.748, + .sr_enter_plus_exit_time_us = 11.102, + .valid = true, + }, + } +}; + +struct wm_table lpddr4_wm_table_with_disabled_ppt = { + .entries = { + { + .wm_inst = WM_A, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 8.32, + .sr_enter_plus_exit_time_us = 9.38, + .valid = true, + }, + { + .wm_inst = WM_B, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 9.82, + .sr_enter_plus_exit_time_us = 11.196, + .valid = true, + }, + { + .wm_inst = WM_C, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 9.89, + .sr_enter_plus_exit_time_us = 11.24, + .valid = true, + }, + { + .wm_inst = WM_D, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 9.748, + .sr_enter_plus_exit_time_us = 11.102, + .valid = true, + }, + } +}; + +struct wm_table ddr4_wm_table_rn = { + .entries = { + { + .wm_inst = WM_A, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 11.90, + .sr_enter_plus_exit_time_us = 12.80, + .valid = true, + }, + { + .wm_inst = WM_B, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 13.18, + .sr_enter_plus_exit_time_us = 14.30, + .valid = true, + }, + { + .wm_inst = WM_C, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 13.18, + .sr_enter_plus_exit_time_us = 14.30, + .valid = true, + }, + { + .wm_inst = WM_D, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 13.18, + .sr_enter_plus_exit_time_us = 14.30, + .valid = true, + }, + } +}; + +struct wm_table ddr4_1R_wm_table_rn = { + .entries = { + { + .wm_inst = WM_A, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 13.90, + .sr_enter_plus_exit_time_us = 14.80, + .valid = true, + }, + { + .wm_inst = WM_B, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 13.90, + .sr_enter_plus_exit_time_us = 14.80, + .valid = true, + }, + { + .wm_inst = WM_C, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 13.90, + .sr_enter_plus_exit_time_us = 14.80, + .valid = true, + }, + { + .wm_inst = WM_D, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 13.90, + .sr_enter_plus_exit_time_us = 14.80, + .valid = true, + }, + } +}; + +struct wm_table lpddr4_wm_table_rn = { + .entries = { + { + .wm_inst = WM_A, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 7.32, + .sr_enter_plus_exit_time_us = 8.38, + .valid = true, + }, + { + .wm_inst = WM_B, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 9.82, + .sr_enter_plus_exit_time_us = 11.196, + .valid = true, + }, + { + .wm_inst = WM_C, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 9.89, + .sr_enter_plus_exit_time_us = 11.24, + .valid = true, + }, + { + .wm_inst = WM_D, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 9.748, + .sr_enter_plus_exit_time_us = 11.102, + .valid = true, + }, + } +}; + +void dcn20_populate_dml_writeback_from_context(struct dc *dc, + struct resource_context *res_ctx, + display_e2e_pipe_params_st *pipes) +{ + int pipe_cnt, i; + + dc_assert_fp_enabled(); + + for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { + struct dc_writeback_info *wb_info = &res_ctx->pipe_ctx[i].stream->writeback_info[0]; + + if (!res_ctx->pipe_ctx[i].stream) + continue; + + /* Set writeback information */ + pipes[pipe_cnt].dout.wb_enable = (wb_info->wb_enabled == true) ? 1 : 0; + pipes[pipe_cnt].dout.num_active_wb++; + pipes[pipe_cnt].dout.wb.wb_src_height = wb_info->dwb_params.cnv_params.crop_height; + pipes[pipe_cnt].dout.wb.wb_src_width = wb_info->dwb_params.cnv_params.crop_width; + pipes[pipe_cnt].dout.wb.wb_dst_width = wb_info->dwb_params.dest_width; + pipes[pipe_cnt].dout.wb.wb_dst_height = wb_info->dwb_params.dest_height; + pipes[pipe_cnt].dout.wb.wb_htaps_luma = 1; + pipes[pipe_cnt].dout.wb.wb_vtaps_luma = 1; + pipes[pipe_cnt].dout.wb.wb_htaps_chroma = wb_info->dwb_params.scaler_taps.h_taps_c; + pipes[pipe_cnt].dout.wb.wb_vtaps_chroma = wb_info->dwb_params.scaler_taps.v_taps_c; + pipes[pipe_cnt].dout.wb.wb_hratio = 1.0; + pipes[pipe_cnt].dout.wb.wb_vratio = 1.0; + if (wb_info->dwb_params.out_format == dwb_scaler_mode_yuv420) { + if (wb_info->dwb_params.output_depth == DWB_OUTPUT_PIXEL_DEPTH_8BPC) + pipes[pipe_cnt].dout.wb.wb_pixel_format = dm_420_8; + else + pipes[pipe_cnt].dout.wb.wb_pixel_format = dm_420_10; + } else { + pipes[pipe_cnt].dout.wb.wb_pixel_format = dm_444_32; + } + + pipe_cnt++; + } +} + +void dcn20_fpu_set_wb_arb_params(struct mcif_arb_params *wb_arb_params, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, int i) +{ + int k; + + dc_assert_fp_enabled(); + + for (k = 0; k < sizeof(wb_arb_params->cli_watermark)/sizeof(wb_arb_params->cli_watermark[0]); k++) { + wb_arb_params->cli_watermark[k] = get_wm_writeback_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + wb_arb_params->pstate_watermark[k] = get_wm_writeback_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + } + wb_arb_params->time_per_pixel = 16.0 * 1000 / (context->res_ctx.pipe_ctx[i].stream->phy_pix_clk / 1000); /* 4 bit fraction, ms */ +} + +static bool is_dtbclk_required(struct dc *dc, struct dc_state *context) +{ + int i; + for (i = 0; i < dc->res_pool->pipe_count; i++) { + if (!context->res_ctx.pipe_ctx[i].stream) + continue; + if (is_dp_128b_132b_signal(&context->res_ctx.pipe_ctx[i])) + return true; + } + return false; +} + +static enum dcn_zstate_support_state decide_zstate_support(struct dc *dc, struct dc_state *context) +{ + int plane_count; + int i; + + plane_count = 0; + for (i = 0; i < dc->res_pool->pipe_count; i++) { + if (context->res_ctx.pipe_ctx[i].plane_state) + plane_count++; + } + + /* + * Z9 and Z10 allowed cases: + * 1. 0 Planes enabled + * 2. single eDP, on link 0, 1 plane and stutter period > 5ms + * Z10 only cases: + * 1. single eDP, on link 0, 1 plane and stutter period >= 5ms + * Z8 cases: + * 1. stutter period sufficient + * Zstate not allowed cases: + * 1. Everything else + */ + if (plane_count == 0) + return DCN_ZSTATE_SUPPORT_ALLOW; + else if (context->stream_count == 1 && context->streams[0]->signal == SIGNAL_TYPE_EDP) { + struct dc_link *link = context->streams[0]->sink->link; + struct dc_stream_status *stream_status = &context->stream_status[0]; + int minmum_z8_residency = dc->debug.minimum_z8_residency_time > 0 ? dc->debug.minimum_z8_residency_time : 1000; + bool allow_z8 = context->bw_ctx.dml.vba.StutterPeriod > (double)minmum_z8_residency; + bool is_pwrseq0 = link->link_index == 0; + + /* Don't support multi-plane configurations */ + if (stream_status->plane_count > 1) + return DCN_ZSTATE_SUPPORT_DISALLOW; + + if (is_pwrseq0 && context->bw_ctx.dml.vba.StutterPeriod > 5000.0) + return DCN_ZSTATE_SUPPORT_ALLOW; + else if (is_pwrseq0 && link->psr_settings.psr_version == DC_PSR_VERSION_1 && !link->panel_config.psr.disable_psr) + return allow_z8 ? DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY : DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY; + else + return allow_z8 ? DCN_ZSTATE_SUPPORT_ALLOW_Z8_ONLY : DCN_ZSTATE_SUPPORT_DISALLOW; + } else { + return DCN_ZSTATE_SUPPORT_DISALLOW; + } +} + +void dcn20_calculate_dlg_params( + struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel) +{ + int i, pipe_idx; + + dc_assert_fp_enabled(); + + /* Writeback MCIF_WB arbitration parameters */ + dc->res_pool->funcs->set_mcif_arb_params(dc, context, pipes, pipe_cnt); + + context->bw_ctx.bw.dcn.clk.dispclk_khz = context->bw_ctx.dml.vba.DISPCLK * 1000; + context->bw_ctx.bw.dcn.clk.dcfclk_khz = context->bw_ctx.dml.vba.DCFCLK * 1000; + context->bw_ctx.bw.dcn.clk.socclk_khz = context->bw_ctx.dml.vba.SOCCLK * 1000; + context->bw_ctx.bw.dcn.clk.dramclk_khz = context->bw_ctx.dml.vba.DRAMSpeed * 1000 / 16; + + if (dc->debug.min_dram_clk_khz > context->bw_ctx.bw.dcn.clk.dramclk_khz) + context->bw_ctx.bw.dcn.clk.dramclk_khz = dc->debug.min_dram_clk_khz; + + context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = context->bw_ctx.dml.vba.DCFCLKDeepSleep * 1000; + context->bw_ctx.bw.dcn.clk.fclk_khz = context->bw_ctx.dml.vba.FabricClock * 1000; + context->bw_ctx.bw.dcn.clk.p_state_change_support = + context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] + != dm_dram_clock_change_unsupported; + + /* Pstate change might not be supported by hardware, but it might be + * possible with firmware driven vertical blank stretching. + */ + context->bw_ctx.bw.dcn.clk.p_state_change_support |= context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching; + + context->bw_ctx.bw.dcn.clk.dppclk_khz = 0; + + context->bw_ctx.bw.dcn.clk.dtbclk_en = is_dtbclk_required(dc, context); + + if (context->bw_ctx.bw.dcn.clk.dispclk_khz < dc->debug.min_disp_clk_khz) + context->bw_ctx.bw.dcn.clk.dispclk_khz = dc->debug.min_disp_clk_khz; + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + if (!context->res_ctx.pipe_ctx[i].stream) + continue; + pipes[pipe_idx].pipe.dest.vstartup_start = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + pipes[pipe_idx].pipe.dest.vupdate_offset = get_vupdate_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + pipes[pipe_idx].pipe.dest.vupdate_width = get_vupdate_width(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + pipes[pipe_idx].pipe.dest.vready_offset = get_vready_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + if (context->res_ctx.pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM) { + // Phantom pipe requires that DET_SIZE = 0 and no unbounded requests + context->res_ctx.pipe_ctx[i].det_buffer_size_kb = 0; + context->res_ctx.pipe_ctx[i].unbounded_req = false; + } else { + context->res_ctx.pipe_ctx[i].det_buffer_size_kb = context->bw_ctx.dml.ip.det_buffer_size_kbytes; + context->res_ctx.pipe_ctx[i].unbounded_req = pipes[pipe_idx].pipe.src.unbounded_req_mode; + } + if (context->bw_ctx.bw.dcn.clk.dppclk_khz < pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000) + context->bw_ctx.bw.dcn.clk.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000; + context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = + pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000; + context->res_ctx.pipe_ctx[i].pipe_dlg_param = pipes[pipe_idx].pipe.dest; + pipe_idx++; + } + /*save a original dppclock copy*/ + context->bw_ctx.bw.dcn.clk.bw_dppclk_khz = context->bw_ctx.bw.dcn.clk.dppclk_khz; + context->bw_ctx.bw.dcn.clk.bw_dispclk_khz = context->bw_ctx.bw.dcn.clk.dispclk_khz; + context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz = context->bw_ctx.dml.soc.clock_limits[vlevel].dppclk_mhz * 1000; + context->bw_ctx.bw.dcn.clk.max_supported_dispclk_khz = context->bw_ctx.dml.soc.clock_limits[vlevel].dispclk_mhz * 1000; + + context->bw_ctx.bw.dcn.compbuf_size_kb = context->bw_ctx.dml.ip.config_return_buffer_size_in_kbytes + - context->bw_ctx.dml.ip.det_buffer_size_kbytes * pipe_idx; + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + bool cstate_en = context->bw_ctx.dml.vba.PrefetchMode[vlevel][context->bw_ctx.dml.vba.maxMpcComb] != 2; + + if (!context->res_ctx.pipe_ctx[i].stream) + continue; + + if (dc->ctx->dce_version == DCN_VERSION_2_01) + cstate_en = false; + + context->bw_ctx.dml.funcs.rq_dlg_get_dlg_reg(&context->bw_ctx.dml, + &context->res_ctx.pipe_ctx[i].dlg_regs, + &context->res_ctx.pipe_ctx[i].ttu_regs, + pipes, + pipe_cnt, + pipe_idx, + cstate_en, + context->bw_ctx.bw.dcn.clk.p_state_change_support, + false, false, true); + + context->bw_ctx.dml.funcs.rq_dlg_get_rq_reg(&context->bw_ctx.dml, + &context->res_ctx.pipe_ctx[i].rq_regs, + &pipes[pipe_idx].pipe); + pipe_idx++; + } + context->bw_ctx.bw.dcn.clk.zstate_support = decide_zstate_support(dc, context); +} + +static void swizzle_to_dml_params( + enum swizzle_mode_values swizzle, + unsigned int *sw_mode) +{ + switch (swizzle) { + case DC_SW_LINEAR: + *sw_mode = dm_sw_linear; + break; + case DC_SW_4KB_S: + *sw_mode = dm_sw_4kb_s; + break; + case DC_SW_4KB_S_X: + *sw_mode = dm_sw_4kb_s_x; + break; + case DC_SW_4KB_D: + *sw_mode = dm_sw_4kb_d; + break; + case DC_SW_4KB_D_X: + *sw_mode = dm_sw_4kb_d_x; + break; + case DC_SW_64KB_S: + *sw_mode = dm_sw_64kb_s; + break; + case DC_SW_64KB_S_X: + *sw_mode = dm_sw_64kb_s_x; + break; + case DC_SW_64KB_S_T: + *sw_mode = dm_sw_64kb_s_t; + break; + case DC_SW_64KB_D: + *sw_mode = dm_sw_64kb_d; + break; + case DC_SW_64KB_D_X: + *sw_mode = dm_sw_64kb_d_x; + break; + case DC_SW_64KB_D_T: + *sw_mode = dm_sw_64kb_d_t; + break; + case DC_SW_64KB_R_X: + *sw_mode = dm_sw_64kb_r_x; + break; + case DC_SW_VAR_S: + *sw_mode = dm_sw_var_s; + break; + case DC_SW_VAR_S_X: + *sw_mode = dm_sw_var_s_x; + break; + case DC_SW_VAR_D: + *sw_mode = dm_sw_var_d; + break; + case DC_SW_VAR_D_X: + *sw_mode = dm_sw_var_d_x; + break; + case DC_SW_VAR_R_X: + *sw_mode = dm_sw_var_r_x; + break; + default: + ASSERT(0); /* Not supported */ + break; + } +} + +int dcn20_populate_dml_pipes_from_context( + struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + bool fast_validate) +{ + int pipe_cnt, i; + bool synchronized_vblank = true; + struct resource_context *res_ctx = &context->res_ctx; + + dc_assert_fp_enabled(); + + for (i = 0, pipe_cnt = -1; i < dc->res_pool->pipe_count; i++) { + if (!res_ctx->pipe_ctx[i].stream) + continue; + + if (pipe_cnt < 0) { + pipe_cnt = i; + continue; + } + + if (res_ctx->pipe_ctx[pipe_cnt].stream == res_ctx->pipe_ctx[i].stream) + continue; + + if (dc->debug.disable_timing_sync || + (!resource_are_streams_timing_synchronizable( + res_ctx->pipe_ctx[pipe_cnt].stream, + res_ctx->pipe_ctx[i].stream) && + !resource_are_vblanks_synchronizable( + res_ctx->pipe_ctx[pipe_cnt].stream, + res_ctx->pipe_ctx[i].stream))) { + synchronized_vblank = false; + break; + } + } + + for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { + struct dc_crtc_timing *timing = &res_ctx->pipe_ctx[i].stream->timing; + unsigned int v_total; + unsigned int front_porch; + int output_bpc; + struct audio_check aud_check = {0}; + + if (!res_ctx->pipe_ctx[i].stream) + continue; + + v_total = timing->v_total; + front_porch = timing->v_front_porch; + + /* todo: + pipes[pipe_cnt].pipe.src.dynamic_metadata_enable = 0; + pipes[pipe_cnt].pipe.src.dcc = 0; + pipes[pipe_cnt].pipe.src.vm = 0;*/ + + pipes[pipe_cnt].clks_cfg.refclk_mhz = dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000.0; + + pipes[pipe_cnt].dout.dsc_enable = res_ctx->pipe_ctx[i].stream->timing.flags.DSC; + /* todo: rotation?*/ + pipes[pipe_cnt].dout.dsc_slices = res_ctx->pipe_ctx[i].stream->timing.dsc_cfg.num_slices_h; + if (res_ctx->pipe_ctx[i].stream->use_dynamic_meta) { + pipes[pipe_cnt].pipe.src.dynamic_metadata_enable = true; + /* 1/2 vblank */ + pipes[pipe_cnt].pipe.src.dynamic_metadata_lines_before_active = + (v_total - timing->v_addressable + - timing->v_border_top - timing->v_border_bottom) / 2; + /* 36 bytes dp, 32 hdmi */ + pipes[pipe_cnt].pipe.src.dynamic_metadata_xmit_bytes = + dc_is_dp_signal(res_ctx->pipe_ctx[i].stream->signal) ? 36 : 32; + } + pipes[pipe_cnt].pipe.src.dcc = false; + pipes[pipe_cnt].pipe.src.dcc_rate = 1; + pipes[pipe_cnt].pipe.dest.synchronized_vblank_all_planes = synchronized_vblank; + pipes[pipe_cnt].pipe.dest.synchronize_timings = synchronized_vblank; + pipes[pipe_cnt].pipe.dest.hblank_start = timing->h_total - timing->h_front_porch; + pipes[pipe_cnt].pipe.dest.hblank_end = pipes[pipe_cnt].pipe.dest.hblank_start + - timing->h_addressable + - timing->h_border_left + - timing->h_border_right; + pipes[pipe_cnt].pipe.dest.vblank_start = v_total - front_porch; + pipes[pipe_cnt].pipe.dest.vblank_end = pipes[pipe_cnt].pipe.dest.vblank_start + - timing->v_addressable + - timing->v_border_top + - timing->v_border_bottom; + pipes[pipe_cnt].pipe.dest.htotal = timing->h_total; + pipes[pipe_cnt].pipe.dest.vtotal = v_total; + pipes[pipe_cnt].pipe.dest.hactive = + timing->h_addressable + timing->h_border_left + timing->h_border_right; + pipes[pipe_cnt].pipe.dest.vactive = + timing->v_addressable + timing->v_border_top + timing->v_border_bottom; + pipes[pipe_cnt].pipe.dest.interlaced = timing->flags.INTERLACE; + pipes[pipe_cnt].pipe.dest.pixel_rate_mhz = timing->pix_clk_100hz/10000.0; + if (timing->timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING) + pipes[pipe_cnt].pipe.dest.pixel_rate_mhz *= 2; + pipes[pipe_cnt].pipe.dest.otg_inst = res_ctx->pipe_ctx[i].stream_res.tg->inst; + pipes[pipe_cnt].dout.dp_lanes = 4; + if (res_ctx->pipe_ctx[i].stream->link) + pipes[pipe_cnt].dout.dp_rate = dm_dp_rate_na; + pipes[pipe_cnt].dout.is_virtual = 0; + pipes[pipe_cnt].pipe.dest.vtotal_min = res_ctx->pipe_ctx[i].stream->adjust.v_total_min; + pipes[pipe_cnt].pipe.dest.vtotal_max = res_ctx->pipe_ctx[i].stream->adjust.v_total_max; + switch (get_num_odm_splits(&res_ctx->pipe_ctx[i])) { + case 1: + pipes[pipe_cnt].pipe.dest.odm_combine = dm_odm_combine_mode_2to1; + break; + case 3: + pipes[pipe_cnt].pipe.dest.odm_combine = dm_odm_combine_mode_4to1; + break; + default: + pipes[pipe_cnt].pipe.dest.odm_combine = dm_odm_combine_mode_disabled; + } + pipes[pipe_cnt].pipe.src.hsplit_grp = res_ctx->pipe_ctx[i].pipe_idx; + if (res_ctx->pipe_ctx[i].top_pipe && res_ctx->pipe_ctx[i].top_pipe->plane_state + == res_ctx->pipe_ctx[i].plane_state) { + struct pipe_ctx *first_pipe = res_ctx->pipe_ctx[i].top_pipe; + int split_idx = 0; + + while (first_pipe->top_pipe && first_pipe->top_pipe->plane_state + == res_ctx->pipe_ctx[i].plane_state) { + first_pipe = first_pipe->top_pipe; + split_idx++; + } + /* Treat 4to1 mpc combine as an mpo of 2 2-to-1 combines */ + if (split_idx == 0) + pipes[pipe_cnt].pipe.src.hsplit_grp = first_pipe->pipe_idx; + else if (split_idx == 1) + pipes[pipe_cnt].pipe.src.hsplit_grp = res_ctx->pipe_ctx[i].pipe_idx; + else if (split_idx == 2) + pipes[pipe_cnt].pipe.src.hsplit_grp = res_ctx->pipe_ctx[i].top_pipe->pipe_idx; + } else if (res_ctx->pipe_ctx[i].prev_odm_pipe) { + struct pipe_ctx *first_pipe = res_ctx->pipe_ctx[i].prev_odm_pipe; + + while (first_pipe->prev_odm_pipe) + first_pipe = first_pipe->prev_odm_pipe; + pipes[pipe_cnt].pipe.src.hsplit_grp = first_pipe->pipe_idx; + } + + switch (res_ctx->pipe_ctx[i].stream->signal) { + case SIGNAL_TYPE_DISPLAY_PORT_MST: + case SIGNAL_TYPE_DISPLAY_PORT: + pipes[pipe_cnt].dout.output_type = dm_dp; + break; + case SIGNAL_TYPE_EDP: + pipes[pipe_cnt].dout.output_type = dm_edp; + break; + case SIGNAL_TYPE_HDMI_TYPE_A: + case SIGNAL_TYPE_DVI_SINGLE_LINK: + case SIGNAL_TYPE_DVI_DUAL_LINK: + pipes[pipe_cnt].dout.output_type = dm_hdmi; + break; + default: + /* In case there is no signal, set dp with 4 lanes to allow max config */ + pipes[pipe_cnt].dout.is_virtual = 1; + pipes[pipe_cnt].dout.output_type = dm_dp; + pipes[pipe_cnt].dout.dp_lanes = 4; + pipes[pipe_cnt].dout.dp_rate = dm_dp_rate_hbr2; + } + + switch (res_ctx->pipe_ctx[i].stream->timing.display_color_depth) { + case COLOR_DEPTH_666: + output_bpc = 6; + break; + case COLOR_DEPTH_888: + output_bpc = 8; + break; + case COLOR_DEPTH_101010: + output_bpc = 10; + break; + case COLOR_DEPTH_121212: + output_bpc = 12; + break; + case COLOR_DEPTH_141414: + output_bpc = 14; + break; + case COLOR_DEPTH_161616: + output_bpc = 16; + break; + case COLOR_DEPTH_999: + output_bpc = 9; + break; + case COLOR_DEPTH_111111: + output_bpc = 11; + break; + default: + output_bpc = 8; + break; + } + + switch (res_ctx->pipe_ctx[i].stream->timing.pixel_encoding) { + case PIXEL_ENCODING_RGB: + case PIXEL_ENCODING_YCBCR444: + pipes[pipe_cnt].dout.output_format = dm_444; + pipes[pipe_cnt].dout.output_bpp = output_bpc * 3; + break; + case PIXEL_ENCODING_YCBCR420: + pipes[pipe_cnt].dout.output_format = dm_420; + pipes[pipe_cnt].dout.output_bpp = (output_bpc * 3.0) / 2; + break; + case PIXEL_ENCODING_YCBCR422: + if (res_ctx->pipe_ctx[i].stream->timing.flags.DSC && + !res_ctx->pipe_ctx[i].stream->timing.dsc_cfg.ycbcr422_simple) + pipes[pipe_cnt].dout.output_format = dm_n422; + else + pipes[pipe_cnt].dout.output_format = dm_s422; + pipes[pipe_cnt].dout.output_bpp = output_bpc * 2; + break; + default: + pipes[pipe_cnt].dout.output_format = dm_444; + pipes[pipe_cnt].dout.output_bpp = output_bpc * 3; + } + + if (res_ctx->pipe_ctx[i].stream->timing.flags.DSC) + pipes[pipe_cnt].dout.output_bpp = res_ctx->pipe_ctx[i].stream->timing.dsc_cfg.bits_per_pixel / 16.0; + + /* todo: default max for now, until there is logic reflecting this in dc*/ + pipes[pipe_cnt].dout.dsc_input_bpc = 12; + /*fill up the audio sample rate (unit in kHz)*/ + get_audio_check(&res_ctx->pipe_ctx[i].stream->audio_info, &aud_check); + pipes[pipe_cnt].dout.max_audio_sample_rate = aud_check.max_audiosample_rate / 1000; + /* + * For graphic plane, cursor number is 1, nv12 is 0 + * bw calculations due to cursor on/off + */ + if (res_ctx->pipe_ctx[i].plane_state && + (res_ctx->pipe_ctx[i].plane_state->address.type == PLN_ADDR_TYPE_VIDEO_PROGRESSIVE || + res_ctx->pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM)) + pipes[pipe_cnt].pipe.src.num_cursors = 0; + else + pipes[pipe_cnt].pipe.src.num_cursors = dc->dml.ip.number_of_cursors; + + pipes[pipe_cnt].pipe.src.cur0_src_width = 256; + pipes[pipe_cnt].pipe.src.cur0_bpp = dm_cur_32bit; + + if (!res_ctx->pipe_ctx[i].plane_state) { + pipes[pipe_cnt].pipe.src.is_hsplit = pipes[pipe_cnt].pipe.dest.odm_combine != dm_odm_combine_mode_disabled; + pipes[pipe_cnt].pipe.src.source_scan = dm_horz; + pipes[pipe_cnt].pipe.src.source_rotation = dm_rotation_0; + pipes[pipe_cnt].pipe.src.sw_mode = dm_sw_4kb_s; + pipes[pipe_cnt].pipe.src.macro_tile_size = dm_64k_tile; + pipes[pipe_cnt].pipe.src.viewport_width = timing->h_addressable; + if (pipes[pipe_cnt].pipe.src.viewport_width > 1920) + pipes[pipe_cnt].pipe.src.viewport_width = 1920; + pipes[pipe_cnt].pipe.src.viewport_height = timing->v_addressable; + if (pipes[pipe_cnt].pipe.src.viewport_height > 1080) + pipes[pipe_cnt].pipe.src.viewport_height = 1080; + pipes[pipe_cnt].pipe.src.surface_height_y = pipes[pipe_cnt].pipe.src.viewport_height; + pipes[pipe_cnt].pipe.src.surface_width_y = pipes[pipe_cnt].pipe.src.viewport_width; + pipes[pipe_cnt].pipe.src.surface_height_c = pipes[pipe_cnt].pipe.src.viewport_height; + pipes[pipe_cnt].pipe.src.surface_width_c = pipes[pipe_cnt].pipe.src.viewport_width; + pipes[pipe_cnt].pipe.src.data_pitch = ((pipes[pipe_cnt].pipe.src.viewport_width + 255) / 256) * 256; + pipes[pipe_cnt].pipe.src.source_format = dm_444_32; + pipes[pipe_cnt].pipe.dest.recout_width = pipes[pipe_cnt].pipe.src.viewport_width; /*vp_width/hratio*/ + pipes[pipe_cnt].pipe.dest.recout_height = pipes[pipe_cnt].pipe.src.viewport_height; /*vp_height/vratio*/ + pipes[pipe_cnt].pipe.dest.full_recout_width = pipes[pipe_cnt].pipe.dest.recout_width; /*when is_hsplit != 1*/ + pipes[pipe_cnt].pipe.dest.full_recout_height = pipes[pipe_cnt].pipe.dest.recout_height; /*when is_hsplit != 1*/ + pipes[pipe_cnt].pipe.scale_ratio_depth.lb_depth = dm_lb_16; + pipes[pipe_cnt].pipe.scale_ratio_depth.hscl_ratio = 1.0; + pipes[pipe_cnt].pipe.scale_ratio_depth.vscl_ratio = 1.0; + pipes[pipe_cnt].pipe.scale_ratio_depth.scl_enable = 0; /*Lb only or Full scl*/ + pipes[pipe_cnt].pipe.scale_taps.htaps = 1; + pipes[pipe_cnt].pipe.scale_taps.vtaps = 1; + pipes[pipe_cnt].pipe.dest.vtotal_min = v_total; + pipes[pipe_cnt].pipe.dest.vtotal_max = v_total; + + if (pipes[pipe_cnt].pipe.dest.odm_combine == dm_odm_combine_mode_2to1) { + pipes[pipe_cnt].pipe.src.viewport_width /= 2; + pipes[pipe_cnt].pipe.dest.recout_width /= 2; + } else if (pipes[pipe_cnt].pipe.dest.odm_combine == dm_odm_combine_mode_4to1) { + pipes[pipe_cnt].pipe.src.viewport_width /= 4; + pipes[pipe_cnt].pipe.dest.recout_width /= 4; + } + } else { + struct dc_plane_state *pln = res_ctx->pipe_ctx[i].plane_state; + struct scaler_data *scl = &res_ctx->pipe_ctx[i].plane_res.scl_data; + + pipes[pipe_cnt].pipe.src.immediate_flip = pln->flip_immediate; + pipes[pipe_cnt].pipe.src.is_hsplit = (res_ctx->pipe_ctx[i].bottom_pipe && res_ctx->pipe_ctx[i].bottom_pipe->plane_state == pln) + || (res_ctx->pipe_ctx[i].top_pipe && res_ctx->pipe_ctx[i].top_pipe->plane_state == pln) + || pipes[pipe_cnt].pipe.dest.odm_combine != dm_odm_combine_mode_disabled; + + /* stereo is not split */ + if (pln->stereo_format == PLANE_STEREO_FORMAT_SIDE_BY_SIDE || + pln->stereo_format == PLANE_STEREO_FORMAT_TOP_AND_BOTTOM) { + pipes[pipe_cnt].pipe.src.is_hsplit = false; + pipes[pipe_cnt].pipe.src.hsplit_grp = res_ctx->pipe_ctx[i].pipe_idx; + } + + pipes[pipe_cnt].pipe.src.source_scan = pln->rotation == ROTATION_ANGLE_90 + || pln->rotation == ROTATION_ANGLE_270 ? dm_vert : dm_horz; + switch (pln->rotation) { + case ROTATION_ANGLE_0: + pipes[pipe_cnt].pipe.src.source_rotation = dm_rotation_0; + break; + case ROTATION_ANGLE_90: + pipes[pipe_cnt].pipe.src.source_rotation = dm_rotation_90; + break; + case ROTATION_ANGLE_180: + pipes[pipe_cnt].pipe.src.source_rotation = dm_rotation_180; + break; + case ROTATION_ANGLE_270: + pipes[pipe_cnt].pipe.src.source_rotation = dm_rotation_270; + break; + default: + break; + } + pipes[pipe_cnt].pipe.src.viewport_y_y = scl->viewport.y; + pipes[pipe_cnt].pipe.src.viewport_y_c = scl->viewport_c.y; + pipes[pipe_cnt].pipe.src.viewport_x_y = scl->viewport.x; + pipes[pipe_cnt].pipe.src.viewport_x_c = scl->viewport_c.x; + pipes[pipe_cnt].pipe.src.viewport_width = scl->viewport.width; + pipes[pipe_cnt].pipe.src.viewport_width_c = scl->viewport_c.width; + pipes[pipe_cnt].pipe.src.viewport_height = scl->viewport.height; + pipes[pipe_cnt].pipe.src.viewport_height_c = scl->viewport_c.height; + pipes[pipe_cnt].pipe.src.viewport_width_max = pln->src_rect.width; + pipes[pipe_cnt].pipe.src.viewport_height_max = pln->src_rect.height; + pipes[pipe_cnt].pipe.src.surface_width_y = pln->plane_size.surface_size.width; + pipes[pipe_cnt].pipe.src.surface_height_y = pln->plane_size.surface_size.height; + pipes[pipe_cnt].pipe.src.surface_width_c = pln->plane_size.chroma_size.width; + pipes[pipe_cnt].pipe.src.surface_height_c = pln->plane_size.chroma_size.height; + if (pln->format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA + || pln->format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) { + pipes[pipe_cnt].pipe.src.data_pitch = pln->plane_size.surface_pitch; + pipes[pipe_cnt].pipe.src.data_pitch_c = pln->plane_size.chroma_pitch; + pipes[pipe_cnt].pipe.src.meta_pitch = pln->dcc.meta_pitch; + pipes[pipe_cnt].pipe.src.meta_pitch_c = pln->dcc.meta_pitch_c; + } else { + pipes[pipe_cnt].pipe.src.data_pitch = pln->plane_size.surface_pitch; + pipes[pipe_cnt].pipe.src.meta_pitch = pln->dcc.meta_pitch; + } + pipes[pipe_cnt].pipe.src.dcc = pln->dcc.enable; + pipes[pipe_cnt].pipe.dest.recout_width = scl->recout.width; + pipes[pipe_cnt].pipe.dest.recout_height = scl->recout.height; + pipes[pipe_cnt].pipe.dest.full_recout_height = scl->recout.height; + pipes[pipe_cnt].pipe.dest.full_recout_width = scl->recout.width; + if (pipes[pipe_cnt].pipe.dest.odm_combine == dm_odm_combine_mode_2to1) + pipes[pipe_cnt].pipe.dest.full_recout_width *= 2; + else if (pipes[pipe_cnt].pipe.dest.odm_combine == dm_odm_combine_mode_4to1) + pipes[pipe_cnt].pipe.dest.full_recout_width *= 4; + else { + struct pipe_ctx *split_pipe = res_ctx->pipe_ctx[i].bottom_pipe; + + while (split_pipe && split_pipe->plane_state == pln) { + pipes[pipe_cnt].pipe.dest.full_recout_width += split_pipe->plane_res.scl_data.recout.width; + split_pipe = split_pipe->bottom_pipe; + } + split_pipe = res_ctx->pipe_ctx[i].top_pipe; + while (split_pipe && split_pipe->plane_state == pln) { + pipes[pipe_cnt].pipe.dest.full_recout_width += split_pipe->plane_res.scl_data.recout.width; + split_pipe = split_pipe->top_pipe; + } + } + + pipes[pipe_cnt].pipe.scale_ratio_depth.lb_depth = dm_lb_16; + pipes[pipe_cnt].pipe.scale_ratio_depth.hscl_ratio = (double) scl->ratios.horz.value / (1ULL<<32); + pipes[pipe_cnt].pipe.scale_ratio_depth.hscl_ratio_c = (double) scl->ratios.horz_c.value / (1ULL<<32); + pipes[pipe_cnt].pipe.scale_ratio_depth.vscl_ratio = (double) scl->ratios.vert.value / (1ULL<<32); + pipes[pipe_cnt].pipe.scale_ratio_depth.vscl_ratio_c = (double) scl->ratios.vert_c.value / (1ULL<<32); + pipes[pipe_cnt].pipe.scale_ratio_depth.scl_enable = + scl->ratios.vert.value != dc_fixpt_one.value + || scl->ratios.horz.value != dc_fixpt_one.value + || scl->ratios.vert_c.value != dc_fixpt_one.value + || scl->ratios.horz_c.value != dc_fixpt_one.value /*Lb only or Full scl*/ + || dc->debug.always_scale; /*support always scale*/ + pipes[pipe_cnt].pipe.scale_taps.htaps = scl->taps.h_taps; + pipes[pipe_cnt].pipe.scale_taps.htaps_c = scl->taps.h_taps_c; + pipes[pipe_cnt].pipe.scale_taps.vtaps = scl->taps.v_taps; + pipes[pipe_cnt].pipe.scale_taps.vtaps_c = scl->taps.v_taps_c; + + pipes[pipe_cnt].pipe.src.macro_tile_size = + swizzle_mode_to_macro_tile_size(pln->tiling_info.gfx9.swizzle); + swizzle_to_dml_params(pln->tiling_info.gfx9.swizzle, + &pipes[pipe_cnt].pipe.src.sw_mode); + + switch (pln->format) { + case SURFACE_PIXEL_FORMAT_VIDEO_420_YCbCr: + case SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb: + pipes[pipe_cnt].pipe.src.source_format = dm_420_8; + break; + case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCbCr: + case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb: + pipes[pipe_cnt].pipe.src.source_format = dm_420_10; + break; + case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: + case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616: + case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F: + case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F: + pipes[pipe_cnt].pipe.src.source_format = dm_444_64; + break; + case SURFACE_PIXEL_FORMAT_GRPH_ARGB1555: + case SURFACE_PIXEL_FORMAT_GRPH_RGB565: + pipes[pipe_cnt].pipe.src.source_format = dm_444_16; + break; + case SURFACE_PIXEL_FORMAT_GRPH_PALETA_256_COLORS: + pipes[pipe_cnt].pipe.src.source_format = dm_444_8; + break; + case SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA: + pipes[pipe_cnt].pipe.src.source_format = dm_rgbe_alpha; + break; + default: + pipes[pipe_cnt].pipe.src.source_format = dm_444_32; + break; + } + } + + pipe_cnt++; + } + + /* populate writeback information */ + dc->res_pool->funcs->populate_dml_writeback_from_context(dc, res_ctx, pipes); + + return pipe_cnt; +} + +void dcn20_calculate_wm( + struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int *out_pipe_cnt, + int *pipe_split_from, + int vlevel, + bool fast_validate) +{ + int pipe_cnt, i, pipe_idx; + + dc_assert_fp_enabled(); + + for (i = 0, pipe_idx = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { + if (!context->res_ctx.pipe_ctx[i].stream) + continue; + + pipes[pipe_cnt].clks_cfg.refclk_mhz = dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000.0; + pipes[pipe_cnt].clks_cfg.dispclk_mhz = context->bw_ctx.dml.vba.RequiredDISPCLK[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; + + if (pipe_split_from[i] < 0) { + pipes[pipe_cnt].clks_cfg.dppclk_mhz = + context->bw_ctx.dml.vba.RequiredDPPCLK[vlevel][context->bw_ctx.dml.vba.maxMpcComb][pipe_idx]; + if (context->bw_ctx.dml.vba.BlendingAndTiming[pipe_idx] == pipe_idx) + pipes[pipe_cnt].pipe.dest.odm_combine = + context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_idx]; + else + pipes[pipe_cnt].pipe.dest.odm_combine = 0; + pipe_idx++; + } else { + pipes[pipe_cnt].clks_cfg.dppclk_mhz = + context->bw_ctx.dml.vba.RequiredDPPCLK[vlevel][context->bw_ctx.dml.vba.maxMpcComb][pipe_split_from[i]]; + if (context->bw_ctx.dml.vba.BlendingAndTiming[pipe_split_from[i]] == pipe_split_from[i]) + pipes[pipe_cnt].pipe.dest.odm_combine = + context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_split_from[i]]; + else + pipes[pipe_cnt].pipe.dest.odm_combine = 0; + } + + if (dc->config.forced_clocks) { + pipes[pipe_cnt].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz; + pipes[pipe_cnt].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz; + } + if (dc->debug.min_disp_clk_khz > pipes[pipe_cnt].clks_cfg.dispclk_mhz * 1000) + pipes[pipe_cnt].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0; + if (dc->debug.min_dpp_clk_khz > pipes[pipe_cnt].clks_cfg.dppclk_mhz * 1000) + pipes[pipe_cnt].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0; + + pipe_cnt++; + } + + if (pipe_cnt != pipe_idx) { + if (dc->res_pool->funcs->populate_dml_pipes) + pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, + context, pipes, fast_validate); + else + pipe_cnt = dcn20_populate_dml_pipes_from_context(dc, + context, pipes, fast_validate); + } + + *out_pipe_cnt = pipe_cnt; + + pipes[0].clks_cfg.voltage = vlevel; + pipes[0].clks_cfg.dcfclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].dcfclk_mhz; + pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz; + + /* only pipe 0 is read for voltage and dcf/soc clocks */ + if (vlevel < 1) { + pipes[0].clks_cfg.voltage = 1; + pipes[0].clks_cfg.dcfclk_mhz = context->bw_ctx.dml.soc.clock_limits[1].dcfclk_mhz; + pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[1].socclk_mhz; + } + context->bw_ctx.bw.dcn.watermarks.b.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + + if (vlevel < 2) { + pipes[0].clks_cfg.voltage = 2; + pipes[0].clks_cfg.dcfclk_mhz = context->bw_ctx.dml.soc.clock_limits[2].dcfclk_mhz; + pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[2].socclk_mhz; + } + context->bw_ctx.bw.dcn.watermarks.c.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + + if (vlevel < 3) { + pipes[0].clks_cfg.voltage = 3; + pipes[0].clks_cfg.dcfclk_mhz = context->bw_ctx.dml.soc.clock_limits[2].dcfclk_mhz; + pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[2].socclk_mhz; + } + context->bw_ctx.bw.dcn.watermarks.d.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + + pipes[0].clks_cfg.voltage = vlevel; + pipes[0].clks_cfg.dcfclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].dcfclk_mhz; + pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz; + context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; +} + +void dcn20_update_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st *bb, + struct pp_smu_nv_clock_table *max_clocks, unsigned int *uclk_states, unsigned int num_states) +{ + int num_calculated_states = 0; + int min_dcfclk = 0; + int i; + + dc_assert_fp_enabled(); + + if (num_states == 0) + return; + + memset(bb->clock_limits, 0, sizeof(bb->clock_limits)); + + if (dc->bb_overrides.min_dcfclk_mhz > 0) { + min_dcfclk = dc->bb_overrides.min_dcfclk_mhz; + } else { + if (ASICREV_IS_NAVI12_P(dc->ctx->asic_id.hw_internal_rev)) + min_dcfclk = 310; + else + // Accounting for SOC/DCF relationship, we can go as high as + // 506Mhz in Vmin. + min_dcfclk = 506; + } + + for (i = 0; i < num_states; i++) { + int min_fclk_required_by_uclk; + bb->clock_limits[i].state = i; + bb->clock_limits[i].dram_speed_mts = uclk_states[i] * 16 / 1000; + + // FCLK:UCLK ratio is 1.08 + min_fclk_required_by_uclk = div_u64(((unsigned long long)uclk_states[i]) * 1080, + 1000000); + + bb->clock_limits[i].fabricclk_mhz = (min_fclk_required_by_uclk < min_dcfclk) ? + min_dcfclk : min_fclk_required_by_uclk; + + bb->clock_limits[i].socclk_mhz = (bb->clock_limits[i].fabricclk_mhz > max_clocks->socClockInKhz / 1000) ? + max_clocks->socClockInKhz / 1000 : bb->clock_limits[i].fabricclk_mhz; + + bb->clock_limits[i].dcfclk_mhz = (bb->clock_limits[i].fabricclk_mhz > max_clocks->dcfClockInKhz / 1000) ? + max_clocks->dcfClockInKhz / 1000 : bb->clock_limits[i].fabricclk_mhz; + + bb->clock_limits[i].dispclk_mhz = max_clocks->displayClockInKhz / 1000; + bb->clock_limits[i].dppclk_mhz = max_clocks->displayClockInKhz / 1000; + bb->clock_limits[i].dscclk_mhz = max_clocks->displayClockInKhz / (1000 * 3); + + bb->clock_limits[i].phyclk_mhz = max_clocks->phyClockInKhz / 1000; + + num_calculated_states++; + } + + bb->clock_limits[num_calculated_states - 1].socclk_mhz = max_clocks->socClockInKhz / 1000; + bb->clock_limits[num_calculated_states - 1].fabricclk_mhz = max_clocks->socClockInKhz / 1000; + bb->clock_limits[num_calculated_states - 1].dcfclk_mhz = max_clocks->dcfClockInKhz / 1000; + + bb->num_states = num_calculated_states; + + // Duplicate the last state, DML always an extra state identical to max state to work + memcpy(&bb->clock_limits[num_calculated_states], &bb->clock_limits[num_calculated_states - 1], sizeof(struct _vcs_dpi_voltage_scaling_st)); + bb->clock_limits[num_calculated_states].state = bb->num_states; +} + +void dcn20_cap_soc_clocks( + struct _vcs_dpi_soc_bounding_box_st *bb, + struct pp_smu_nv_clock_table max_clocks) +{ + int i; + + dc_assert_fp_enabled(); + + // First pass - cap all clocks higher than the reported max + for (i = 0; i < bb->num_states; i++) { + if ((bb->clock_limits[i].dcfclk_mhz > (max_clocks.dcfClockInKhz / 1000)) + && max_clocks.dcfClockInKhz != 0) + bb->clock_limits[i].dcfclk_mhz = (max_clocks.dcfClockInKhz / 1000); + + if ((bb->clock_limits[i].dram_speed_mts > (max_clocks.uClockInKhz / 1000) * 16) + && max_clocks.uClockInKhz != 0) + bb->clock_limits[i].dram_speed_mts = (max_clocks.uClockInKhz / 1000) * 16; + + if ((bb->clock_limits[i].fabricclk_mhz > (max_clocks.fabricClockInKhz / 1000)) + && max_clocks.fabricClockInKhz != 0) + bb->clock_limits[i].fabricclk_mhz = (max_clocks.fabricClockInKhz / 1000); + + if ((bb->clock_limits[i].dispclk_mhz > (max_clocks.displayClockInKhz / 1000)) + && max_clocks.displayClockInKhz != 0) + bb->clock_limits[i].dispclk_mhz = (max_clocks.displayClockInKhz / 1000); + + if ((bb->clock_limits[i].dppclk_mhz > (max_clocks.dppClockInKhz / 1000)) + && max_clocks.dppClockInKhz != 0) + bb->clock_limits[i].dppclk_mhz = (max_clocks.dppClockInKhz / 1000); + + if ((bb->clock_limits[i].phyclk_mhz > (max_clocks.phyClockInKhz / 1000)) + && max_clocks.phyClockInKhz != 0) + bb->clock_limits[i].phyclk_mhz = (max_clocks.phyClockInKhz / 1000); + + if ((bb->clock_limits[i].socclk_mhz > (max_clocks.socClockInKhz / 1000)) + && max_clocks.socClockInKhz != 0) + bb->clock_limits[i].socclk_mhz = (max_clocks.socClockInKhz / 1000); + + if ((bb->clock_limits[i].dscclk_mhz > (max_clocks.dscClockInKhz / 1000)) + && max_clocks.dscClockInKhz != 0) + bb->clock_limits[i].dscclk_mhz = (max_clocks.dscClockInKhz / 1000); + } + + // Second pass - remove all duplicate clock states + for (i = bb->num_states - 1; i > 1; i--) { + bool duplicate = true; + + if (bb->clock_limits[i-1].dcfclk_mhz != bb->clock_limits[i].dcfclk_mhz) + duplicate = false; + if (bb->clock_limits[i-1].dispclk_mhz != bb->clock_limits[i].dispclk_mhz) + duplicate = false; + if (bb->clock_limits[i-1].dppclk_mhz != bb->clock_limits[i].dppclk_mhz) + duplicate = false; + if (bb->clock_limits[i-1].dram_speed_mts != bb->clock_limits[i].dram_speed_mts) + duplicate = false; + if (bb->clock_limits[i-1].dscclk_mhz != bb->clock_limits[i].dscclk_mhz) + duplicate = false; + if (bb->clock_limits[i-1].fabricclk_mhz != bb->clock_limits[i].fabricclk_mhz) + duplicate = false; + if (bb->clock_limits[i-1].phyclk_mhz != bb->clock_limits[i].phyclk_mhz) + duplicate = false; + if (bb->clock_limits[i-1].socclk_mhz != bb->clock_limits[i].socclk_mhz) + duplicate = false; + + if (duplicate) + bb->num_states--; + } +} + +void dcn20_patch_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st *bb) +{ + dc_assert_fp_enabled(); + + if ((int)(bb->sr_exit_time_us * 1000) != dc->bb_overrides.sr_exit_time_ns + && dc->bb_overrides.sr_exit_time_ns) { + bb->sr_exit_time_us = dc->bb_overrides.sr_exit_time_ns / 1000.0; + } + + if ((int)(bb->sr_enter_plus_exit_time_us * 1000) + != dc->bb_overrides.sr_enter_plus_exit_time_ns + && dc->bb_overrides.sr_enter_plus_exit_time_ns) { + bb->sr_enter_plus_exit_time_us = + dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0; + } + + if ((int)(bb->urgent_latency_us * 1000) != dc->bb_overrides.urgent_latency_ns + && dc->bb_overrides.urgent_latency_ns) { + bb->urgent_latency_us = dc->bb_overrides.urgent_latency_ns / 1000.0; + } + + if ((int)(bb->dram_clock_change_latency_us * 1000) + != dc->bb_overrides.dram_clock_change_latency_ns + && dc->bb_overrides.dram_clock_change_latency_ns) { + bb->dram_clock_change_latency_us = + dc->bb_overrides.dram_clock_change_latency_ns / 1000.0; + } + + if ((int)(bb->dummy_pstate_latency_us * 1000) + != dc->bb_overrides.dummy_clock_change_latency_ns + && dc->bb_overrides.dummy_clock_change_latency_ns) { + bb->dummy_pstate_latency_us = + dc->bb_overrides.dummy_clock_change_latency_ns / 1000.0; + } +} + +static bool dcn20_validate_bandwidth_internal(struct dc *dc, struct dc_state *context, + bool fast_validate) +{ + bool out = false; + + BW_VAL_TRACE_SETUP(); + + int vlevel = 0; + int pipe_split_from[MAX_PIPES]; + int pipe_cnt = 0; + display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st), GFP_ATOMIC); + DC_LOGGER_INIT(dc->ctx->logger); + + BW_VAL_TRACE_COUNT(); + + out = dcn20_fast_validate_bw(dc, context, pipes, &pipe_cnt, pipe_split_from, &vlevel, fast_validate); + + if (pipe_cnt == 0) + goto validate_out; + + if (!out) + goto validate_fail; + + BW_VAL_TRACE_END_VOLTAGE_LEVEL(); + + if (fast_validate) { + BW_VAL_TRACE_SKIP(fast); + goto validate_out; + } + + dcn20_calculate_wm(dc, context, pipes, &pipe_cnt, pipe_split_from, vlevel, fast_validate); + dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel); + + BW_VAL_TRACE_END_WATERMARKS(); + + goto validate_out; + +validate_fail: + DC_LOG_WARNING("Mode Validation Warning: %s failed validation.\n", + dml_get_status_message(context->bw_ctx.dml.vba.ValidationStatus[context->bw_ctx.dml.vba.soc.num_states])); + + BW_VAL_TRACE_SKIP(fail); + out = false; + +validate_out: + kfree(pipes); + + BW_VAL_TRACE_FINISH(); + + return out; +} + +bool dcn20_validate_bandwidth_fp(struct dc *dc, + struct dc_state *context, + bool fast_validate) +{ + bool voltage_supported = false; + bool full_pstate_supported = false; + bool dummy_pstate_supported = false; + double p_state_latency_us; + + dc_assert_fp_enabled(); + + p_state_latency_us = context->bw_ctx.dml.soc.dram_clock_change_latency_us; + context->bw_ctx.dml.soc.disable_dram_clock_change_vactive_support = + dc->debug.disable_dram_clock_change_vactive_support; + context->bw_ctx.dml.soc.allow_dram_clock_one_display_vactive = + dc->debug.enable_dram_clock_change_one_display_vactive; + + /*Unsafe due to current pipe merge and split logic*/ + ASSERT(context != dc->current_state); + + if (fast_validate) { + return dcn20_validate_bandwidth_internal(dc, context, true); + } + + // Best case, we support full UCLK switch latency + voltage_supported = dcn20_validate_bandwidth_internal(dc, context, false); + full_pstate_supported = context->bw_ctx.bw.dcn.clk.p_state_change_support; + + if (context->bw_ctx.dml.soc.dummy_pstate_latency_us == 0 || + (voltage_supported && full_pstate_supported)) { + context->bw_ctx.bw.dcn.clk.p_state_change_support = full_pstate_supported; + goto restore_dml_state; + } + + // Fallback: Try to only support G6 temperature read latency + context->bw_ctx.dml.soc.dram_clock_change_latency_us = context->bw_ctx.dml.soc.dummy_pstate_latency_us; + + voltage_supported = dcn20_validate_bandwidth_internal(dc, context, false); + dummy_pstate_supported = context->bw_ctx.bw.dcn.clk.p_state_change_support; + + if (voltage_supported && (dummy_pstate_supported || !(context->stream_count))) { + context->bw_ctx.bw.dcn.clk.p_state_change_support = false; + goto restore_dml_state; + } + + // ERROR: fallback is supposed to always work. + ASSERT(false); + +restore_dml_state: + context->bw_ctx.dml.soc.dram_clock_change_latency_us = p_state_latency_us; + return voltage_supported; +} + +void dcn20_fpu_set_wm_ranges(int i, + struct pp_smu_wm_range_sets *ranges, + struct _vcs_dpi_soc_bounding_box_st *loaded_bb) +{ + dc_assert_fp_enabled(); + + ranges->reader_wm_sets[i].min_fill_clk_mhz = (i > 0) ? (loaded_bb->clock_limits[i - 1].dram_speed_mts / 16) + 1 : 0; + ranges->reader_wm_sets[i].max_fill_clk_mhz = loaded_bb->clock_limits[i].dram_speed_mts / 16; +} + +void dcn20_fpu_adjust_dppclk(struct vba_vars_st *v, + int vlevel, + int max_mpc_comb, + int pipe_idx, + bool is_validating_bw) +{ + dc_assert_fp_enabled(); + + if (is_validating_bw) + v->RequiredDPPCLK[vlevel][max_mpc_comb][pipe_idx] *= 2; + else + v->RequiredDPPCLK[vlevel][max_mpc_comb][pipe_idx] /= 2; +} + +int dcn21_populate_dml_pipes_from_context(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + bool fast_validate) +{ + uint32_t pipe_cnt; + int i; + + dc_assert_fp_enabled(); + + pipe_cnt = dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); + + for (i = 0; i < pipe_cnt; i++) { + + pipes[i].pipe.src.hostvm = dc->res_pool->hubbub->riommu_active; + pipes[i].pipe.src.gpuvm = 1; + } + + return pipe_cnt; +} + +static void patch_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st *bb) +{ + int i; + + if (dc->bb_overrides.sr_exit_time_ns) { + for (i = 0; i < WM_SET_COUNT; i++) { + dc->clk_mgr->bw_params->wm_table.entries[i].sr_exit_time_us = + dc->bb_overrides.sr_exit_time_ns / 1000.0; + } + } + + if (dc->bb_overrides.sr_enter_plus_exit_time_ns) { + for (i = 0; i < WM_SET_COUNT; i++) { + dc->clk_mgr->bw_params->wm_table.entries[i].sr_enter_plus_exit_time_us = + dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0; + } + } + + if (dc->bb_overrides.urgent_latency_ns) { + bb->urgent_latency_us = dc->bb_overrides.urgent_latency_ns / 1000.0; + } + + if (dc->bb_overrides.dram_clock_change_latency_ns) { + for (i = 0; i < WM_SET_COUNT; i++) { + dc->clk_mgr->bw_params->wm_table.entries[i].pstate_latency_us = + dc->bb_overrides.dram_clock_change_latency_ns / 1000.0; + } + } +} + +static void calculate_wm_set_for_vlevel(int vlevel, + struct wm_range_table_entry *table_entry, + struct dcn_watermarks *wm_set, + struct display_mode_lib *dml, + display_e2e_pipe_params_st *pipes, + int pipe_cnt) +{ + double dram_clock_change_latency_cached = dml->soc.dram_clock_change_latency_us; + + ASSERT(vlevel < dml->soc.num_states); + /* only pipe 0 is read for voltage and dcf/soc clocks */ + pipes[0].clks_cfg.voltage = vlevel; + pipes[0].clks_cfg.dcfclk_mhz = dml->soc.clock_limits[vlevel].dcfclk_mhz; + pipes[0].clks_cfg.socclk_mhz = dml->soc.clock_limits[vlevel].socclk_mhz; + + dml->soc.dram_clock_change_latency_us = table_entry->pstate_latency_us; + dml->soc.sr_exit_time_us = table_entry->sr_exit_time_us; + dml->soc.sr_enter_plus_exit_time_us = table_entry->sr_enter_plus_exit_time_us; + + wm_set->urgent_ns = get_wm_urgent(dml, pipes, pipe_cnt) * 1000; + wm_set->cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(dml, pipes, pipe_cnt) * 1000; + wm_set->cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(dml, pipes, pipe_cnt) * 1000; + wm_set->cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(dml, pipes, pipe_cnt) * 1000; + wm_set->pte_meta_urgent_ns = get_wm_memory_trip(dml, pipes, pipe_cnt) * 1000; + wm_set->frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(dml, pipes, pipe_cnt) * 1000; + wm_set->frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(dml, pipes, pipe_cnt) * 1000; + wm_set->urgent_latency_ns = get_urgent_latency(dml, pipes, pipe_cnt) * 1000; + dml->soc.dram_clock_change_latency_us = dram_clock_change_latency_cached; +} + +static void dcn21_calculate_wm(struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int *out_pipe_cnt, + int *pipe_split_from, + int vlevel_req, + bool fast_validate) +{ + int pipe_cnt, i, pipe_idx; + int vlevel, vlevel_max; + struct wm_range_table_entry *table_entry; + struct clk_bw_params *bw_params = dc->clk_mgr->bw_params; + + ASSERT(bw_params); + + patch_bounding_box(dc, &context->bw_ctx.dml.soc); + + for (i = 0, pipe_idx = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { + if (!context->res_ctx.pipe_ctx[i].stream) + continue; + + pipes[pipe_cnt].clks_cfg.refclk_mhz = dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000.0; + pipes[pipe_cnt].clks_cfg.dispclk_mhz = context->bw_ctx.dml.vba.RequiredDISPCLK[vlevel_req][context->bw_ctx.dml.vba.maxMpcComb]; + + if (pipe_split_from[i] < 0) { + pipes[pipe_cnt].clks_cfg.dppclk_mhz = + context->bw_ctx.dml.vba.RequiredDPPCLK[vlevel_req][context->bw_ctx.dml.vba.maxMpcComb][pipe_idx]; + if (context->bw_ctx.dml.vba.BlendingAndTiming[pipe_idx] == pipe_idx) + pipes[pipe_cnt].pipe.dest.odm_combine = + context->bw_ctx.dml.vba.ODMCombineEnablePerState[vlevel_req][pipe_idx]; + else + pipes[pipe_cnt].pipe.dest.odm_combine = 0; + pipe_idx++; + } else { + pipes[pipe_cnt].clks_cfg.dppclk_mhz = + context->bw_ctx.dml.vba.RequiredDPPCLK[vlevel_req][context->bw_ctx.dml.vba.maxMpcComb][pipe_split_from[i]]; + if (context->bw_ctx.dml.vba.BlendingAndTiming[pipe_split_from[i]] == pipe_split_from[i]) + pipes[pipe_cnt].pipe.dest.odm_combine = + context->bw_ctx.dml.vba.ODMCombineEnablePerState[vlevel_req][pipe_split_from[i]]; + else + pipes[pipe_cnt].pipe.dest.odm_combine = 0; + } + pipe_cnt++; + } + + if (pipe_cnt != pipe_idx) { + if (dc->res_pool->funcs->populate_dml_pipes) + pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, + context, pipes, fast_validate); + else + pipe_cnt = dcn21_populate_dml_pipes_from_context(dc, + context, pipes, fast_validate); + } + + *out_pipe_cnt = pipe_cnt; + + vlevel_max = bw_params->clk_table.num_entries - 1; + + + /* WM Set D */ + table_entry = &bw_params->wm_table.entries[WM_D]; + if (table_entry->wm_type == WM_TYPE_RETRAINING) + vlevel = 0; + else + vlevel = vlevel_max; + calculate_wm_set_for_vlevel(vlevel, table_entry, &context->bw_ctx.bw.dcn.watermarks.d, + &context->bw_ctx.dml, pipes, pipe_cnt); + /* WM Set C */ + table_entry = &bw_params->wm_table.entries[WM_C]; + vlevel = MIN(MAX(vlevel_req, 3), vlevel_max); + calculate_wm_set_for_vlevel(vlevel, table_entry, &context->bw_ctx.bw.dcn.watermarks.c, + &context->bw_ctx.dml, pipes, pipe_cnt); + /* WM Set B */ + table_entry = &bw_params->wm_table.entries[WM_B]; + vlevel = MIN(MAX(vlevel_req, 2), vlevel_max); + calculate_wm_set_for_vlevel(vlevel, table_entry, &context->bw_ctx.bw.dcn.watermarks.b, + &context->bw_ctx.dml, pipes, pipe_cnt); + + /* WM Set A */ + table_entry = &bw_params->wm_table.entries[WM_A]; + vlevel = MIN(vlevel_req, vlevel_max); + calculate_wm_set_for_vlevel(vlevel, table_entry, &context->bw_ctx.bw.dcn.watermarks.a, + &context->bw_ctx.dml, pipes, pipe_cnt); +} + +bool dcn21_validate_bandwidth_fp(struct dc *dc, + struct dc_state *context, + bool fast_validate) +{ + bool out = false; + + BW_VAL_TRACE_SETUP(); + + int vlevel = 0; + int pipe_split_from[MAX_PIPES]; + int pipe_cnt = 0; + display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st), GFP_ATOMIC); + DC_LOGGER_INIT(dc->ctx->logger); + + BW_VAL_TRACE_COUNT(); + + dc_assert_fp_enabled(); + + /*Unsafe due to current pipe merge and split logic*/ + ASSERT(context != dc->current_state); + + out = dcn21_fast_validate_bw(dc, context, pipes, &pipe_cnt, pipe_split_from, &vlevel, fast_validate); + + if (pipe_cnt == 0) + goto validate_out; + + if (!out) + goto validate_fail; + + BW_VAL_TRACE_END_VOLTAGE_LEVEL(); + + if (fast_validate) { + BW_VAL_TRACE_SKIP(fast); + goto validate_out; + } + + dcn21_calculate_wm(dc, context, pipes, &pipe_cnt, pipe_split_from, vlevel, fast_validate); + dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel); + + BW_VAL_TRACE_END_WATERMARKS(); + + goto validate_out; + +validate_fail: + DC_LOG_WARNING("Mode Validation Warning: %s failed validation.\n", + dml_get_status_message(context->bw_ctx.dml.vba.ValidationStatus[context->bw_ctx.dml.vba.soc.num_states])); + + BW_VAL_TRACE_SKIP(fail); + out = false; + +validate_out: + kfree(pipes); + + BW_VAL_TRACE_FINISH(); + + return out; +} + +static struct _vcs_dpi_voltage_scaling_st construct_low_pstate_lvl(struct clk_limit_table *clk_table, unsigned int high_voltage_lvl) +{ + struct _vcs_dpi_voltage_scaling_st low_pstate_lvl; + int i; + + low_pstate_lvl.state = 1; + low_pstate_lvl.dcfclk_mhz = clk_table->entries[0].dcfclk_mhz; + low_pstate_lvl.fabricclk_mhz = clk_table->entries[0].fclk_mhz; + low_pstate_lvl.socclk_mhz = clk_table->entries[0].socclk_mhz; + low_pstate_lvl.dram_speed_mts = clk_table->entries[0].memclk_mhz * 2; + + low_pstate_lvl.dispclk_mhz = dcn2_1_soc.clock_limits[high_voltage_lvl].dispclk_mhz; + low_pstate_lvl.dppclk_mhz = dcn2_1_soc.clock_limits[high_voltage_lvl].dppclk_mhz; + low_pstate_lvl.dram_bw_per_chan_gbps = dcn2_1_soc.clock_limits[high_voltage_lvl].dram_bw_per_chan_gbps; + low_pstate_lvl.dscclk_mhz = dcn2_1_soc.clock_limits[high_voltage_lvl].dscclk_mhz; + low_pstate_lvl.dtbclk_mhz = dcn2_1_soc.clock_limits[high_voltage_lvl].dtbclk_mhz; + low_pstate_lvl.phyclk_d18_mhz = dcn2_1_soc.clock_limits[high_voltage_lvl].phyclk_d18_mhz; + low_pstate_lvl.phyclk_mhz = dcn2_1_soc.clock_limits[high_voltage_lvl].phyclk_mhz; + + for (i = clk_table->num_entries; i > 1; i--) + clk_table->entries[i] = clk_table->entries[i-1]; + clk_table->entries[1] = clk_table->entries[0]; + clk_table->num_entries++; + + return low_pstate_lvl; +} + +void dcn21_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) +{ + struct _vcs_dpi_voltage_scaling_st *s = dc->scratch.update_bw_bounding_box.clock_limits; + struct dcn21_resource_pool *pool = TO_DCN21_RES_POOL(dc->res_pool); + struct clk_limit_table *clk_table = &bw_params->clk_table; + unsigned int i, closest_clk_lvl = 0, k = 0; + int j; + + dc_assert_fp_enabled(); + + dcn2_1_ip.max_num_otg = pool->base.res_cap->num_timing_generator; + dcn2_1_ip.max_num_dpp = pool->base.pipe_count; + dcn2_1_soc.num_chans = bw_params->num_channels; + + ASSERT(clk_table->num_entries); + /* Copy dcn2_1_soc.clock_limits to clock_limits to avoid copying over null states later */ + memcpy(s, dcn2_1_soc.clock_limits, sizeof(dcn2_1_soc.clock_limits)); + + for (i = 0; i < clk_table->num_entries; i++) { + /* loop backwards*/ + for (closest_clk_lvl = 0, j = dcn2_1_soc.num_states - 1; j >= 0; j--) { + if ((unsigned int) dcn2_1_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) { + closest_clk_lvl = j; + break; + } + } + + /* clk_table[1] is reserved for min DF PState. skip here to fill in later. */ + if (i == 1) + k++; + + s[k].state = k; + s[k].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; + s[k].fabricclk_mhz = clk_table->entries[i].fclk_mhz; + s[k].socclk_mhz = clk_table->entries[i].socclk_mhz; + s[k].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2; + + s[k].dispclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz; + s[k].dppclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz; + s[k].dram_bw_per_chan_gbps = + dcn2_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; + s[k].dscclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz; + s[k].dtbclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; + s[k].phyclk_d18_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; + s[k].phyclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz; + + k++; + } + + memcpy(dcn2_1_soc.clock_limits, s, sizeof(dcn2_1_soc.clock_limits)); + + if (clk_table->num_entries) { + dcn2_1_soc.num_states = clk_table->num_entries + 1; + /* fill in min DF PState */ + dcn2_1_soc.clock_limits[1] = construct_low_pstate_lvl(clk_table, closest_clk_lvl); + /* duplicate last level */ + dcn2_1_soc.clock_limits[dcn2_1_soc.num_states] = dcn2_1_soc.clock_limits[dcn2_1_soc.num_states - 1]; + dcn2_1_soc.clock_limits[dcn2_1_soc.num_states].state = dcn2_1_soc.num_states; + } + + dml_init_instance(&dc->dml, &dcn2_1_soc, &dcn2_1_ip, DML_PROJECT_DCN21); +} + +void dcn21_clk_mgr_set_bw_params_wm_table(struct clk_bw_params *bw_params) +{ + dc_assert_fp_enabled(); + + bw_params->wm_table.entries[WM_D].pstate_latency_us = LPDDR_MEM_RETRAIN_LATENCY; + bw_params->wm_table.entries[WM_D].wm_inst = WM_D; + bw_params->wm_table.entries[WM_D].wm_type = WM_TYPE_RETRAINING; + bw_params->wm_table.entries[WM_D].valid = true; +} + +void dcn201_populate_dml_writeback_from_context_fpu(struct dc *dc, + struct resource_context *res_ctx, + display_e2e_pipe_params_st *pipes) +{ + int pipe_cnt, i, j; + double max_calc_writeback_dispclk; + double writeback_dispclk; + struct writeback_st dout_wb; + + dc_assert_fp_enabled(); + + for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { + struct dc_stream_state *stream = res_ctx->pipe_ctx[i].stream; + + if (!stream) + continue; + max_calc_writeback_dispclk = 0; + + /* Set writeback information */ + pipes[pipe_cnt].dout.wb_enable = 0; + pipes[pipe_cnt].dout.num_active_wb = 0; + for (j = 0; j < stream->num_wb_info; j++) { + struct dc_writeback_info *wb_info = &stream->writeback_info[j]; + + if (wb_info->wb_enabled && wb_info->writeback_source_plane && + (wb_info->writeback_source_plane == res_ctx->pipe_ctx[i].plane_state)) { + pipes[pipe_cnt].dout.wb_enable = 1; + pipes[pipe_cnt].dout.num_active_wb++; + dout_wb.wb_src_height = wb_info->dwb_params.cnv_params.crop_en ? + wb_info->dwb_params.cnv_params.crop_height : + wb_info->dwb_params.cnv_params.src_height; + dout_wb.wb_src_width = wb_info->dwb_params.cnv_params.crop_en ? + wb_info->dwb_params.cnv_params.crop_width : + wb_info->dwb_params.cnv_params.src_width; + dout_wb.wb_dst_width = wb_info->dwb_params.dest_width; + dout_wb.wb_dst_height = wb_info->dwb_params.dest_height; + dout_wb.wb_htaps_luma = wb_info->dwb_params.scaler_taps.h_taps; + dout_wb.wb_vtaps_luma = wb_info->dwb_params.scaler_taps.v_taps; + dout_wb.wb_htaps_chroma = wb_info->dwb_params.scaler_taps.h_taps_c; + dout_wb.wb_vtaps_chroma = wb_info->dwb_params.scaler_taps.v_taps_c; + dout_wb.wb_hratio = wb_info->dwb_params.cnv_params.crop_en ? + (double)wb_info->dwb_params.cnv_params.crop_width / + (double)wb_info->dwb_params.dest_width : + (double)wb_info->dwb_params.cnv_params.src_width / + (double)wb_info->dwb_params.dest_width; + dout_wb.wb_vratio = wb_info->dwb_params.cnv_params.crop_en ? + (double)wb_info->dwb_params.cnv_params.crop_height / + (double)wb_info->dwb_params.dest_height : + (double)wb_info->dwb_params.cnv_params.src_height / + (double)wb_info->dwb_params.dest_height; + if (wb_info->dwb_params.out_format == dwb_scaler_mode_yuv420) { + if (wb_info->dwb_params.output_depth == DWB_OUTPUT_PIXEL_DEPTH_8BPC) + dout_wb.wb_pixel_format = dm_420_8; + else + dout_wb.wb_pixel_format = dm_420_10; + } else + dout_wb.wb_pixel_format = dm_444_32; + + /* Workaround for cases where multiple writebacks are connected to same plane + * In which case, need to compute worst case and set the associated writeback parameters + * This workaround is necessary due to DML computation assuming only 1 set of writeback + * parameters per pipe */ + writeback_dispclk = CalculateWriteBackDISPCLK( + dout_wb.wb_pixel_format, + pipes[pipe_cnt].pipe.dest.pixel_rate_mhz, + dout_wb.wb_hratio, + dout_wb.wb_vratio, + dout_wb.wb_htaps_luma, + dout_wb.wb_vtaps_luma, + dout_wb.wb_htaps_chroma, + dout_wb.wb_vtaps_chroma, + dout_wb.wb_dst_width, + pipes[pipe_cnt].pipe.dest.htotal, + 2); + + if (writeback_dispclk > max_calc_writeback_dispclk) { + max_calc_writeback_dispclk = writeback_dispclk; + pipes[pipe_cnt].dout.wb = dout_wb; + } + } + } + + pipe_cnt++; + } + +} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h new file mode 100644 index 000000000..c51badf7b --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ +#include "core_types.h" + +#ifndef __DCN20_FPU_H__ +#define __DCN20_FPU_H__ + +void dcn20_populate_dml_writeback_from_context(struct dc *dc, + struct resource_context *res_ctx, + display_e2e_pipe_params_st *pipes); + +void dcn20_fpu_set_wb_arb_params(struct mcif_arb_params *wb_arb_params, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, int i); +void dcn20_calculate_dlg_params(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel); +int dcn20_populate_dml_pipes_from_context(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + bool fast_validate); +void dcn20_calculate_wm(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int *out_pipe_cnt, + int *pipe_split_from, + int vlevel, + bool fast_validate); +void dcn20_cap_soc_clocks(struct _vcs_dpi_soc_bounding_box_st *bb, + struct pp_smu_nv_clock_table max_clocks); +void dcn20_update_bounding_box(struct dc *dc, + struct _vcs_dpi_soc_bounding_box_st *bb, + struct pp_smu_nv_clock_table *max_clocks, + unsigned int *uclk_states, + unsigned int num_states); +void dcn20_patch_bounding_box(struct dc *dc, + struct _vcs_dpi_soc_bounding_box_st *bb); +bool dcn20_validate_bandwidth_fp(struct dc *dc, + struct dc_state *context, + bool fast_validate); +void dcn20_fpu_set_wm_ranges(int i, + struct pp_smu_wm_range_sets *ranges, + struct _vcs_dpi_soc_bounding_box_st *loaded_bb); +void dcn20_fpu_adjust_dppclk(struct vba_vars_st *v, + int vlevel, + int max_mpc_comb, + int pipe_idx, + bool is_validating_bw); + +int dcn21_populate_dml_pipes_from_context(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + bool fast_validate); +bool dcn21_validate_bandwidth_fp(struct dc *dc, + struct dc_state *context, + bool fast_validate); +void dcn21_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params); + +void dcn21_clk_mgr_set_bw_params_wm_table(struct clk_bw_params *bw_params); + +void dcn201_populate_dml_writeback_from_context_fpu(struct dc *dc, + struct resource_context *res_ctx, + display_e2e_pipe_params_st *pipes); + +#endif /* __DCN20_FPU_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c new file mode 100644 index 000000000..6266b0788 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c @@ -0,0 +1,5114 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "../display_mode_lib.h" +#include "display_mode_vba_20.h" +#include "../dml_inline_defs.h" + +/* + * NOTE: + * This file is gcc-parseable HW gospel, coming straight from HW engineers. + * + * It doesn't adhere to Linux kernel style and sometimes will do things in odd + * ways. Unless there is something clearly wrong with it the code should + * remain as-is as it provides us with a guarantee from HW that it is correct. + */ + +#define BPP_INVALID 0 +#define BPP_BLENDED_PIPE 0xffffffff +#define DCN20_MAX_420_IMAGE_WIDTH 4096 + +static double adjust_ReturnBW( + struct display_mode_lib *mode_lib, + double ReturnBW, + bool DCCEnabledAnyPlane, + double ReturnBandwidthToDCN); +static unsigned int dscceComputeDelay( + unsigned int bpc, + double bpp, + unsigned int sliceWidth, + unsigned int numSlices, + enum output_format_class pixelFormat); +static unsigned int dscComputeDelay(enum output_format_class pixelFormat); +// Super monster function with some 45 argument +static bool CalculatePrefetchSchedule( + struct display_mode_lib *mode_lib, + double DPPCLK, + double DISPCLK, + double PixelClock, + double DCFCLKDeepSleep, + unsigned int DSCDelay, + unsigned int DPPPerPlane, + bool ScalerEnabled, + unsigned int NumberOfCursors, + double DPPCLKDelaySubtotal, + double DPPCLKDelaySCL, + double DPPCLKDelaySCLLBOnly, + double DPPCLKDelayCNVCFormater, + double DPPCLKDelayCNVCCursor, + double DISPCLKDelaySubtotal, + unsigned int ScalerRecoutWidth, + enum output_format_class OutputFormat, + unsigned int VBlank, + unsigned int HTotal, + unsigned int MaxInterDCNTileRepeaters, + unsigned int VStartup, + unsigned int PageTableLevels, + bool GPUVMEnable, + bool DynamicMetadataEnable, + unsigned int DynamicMetadataLinesBeforeActiveRequired, + unsigned int DynamicMetadataTransmittedBytes, + bool DCCEnable, + double UrgentLatencyPixelDataOnly, + double UrgentExtraLatency, + double TCalc, + unsigned int PDEAndMetaPTEBytesFrame, + unsigned int MetaRowByte, + unsigned int PixelPTEBytesPerRow, + double PrefetchSourceLinesY, + unsigned int SwathWidthY, + double BytePerPixelDETY, + double VInitPreFillY, + unsigned int MaxNumSwathY, + double PrefetchSourceLinesC, + double BytePerPixelDETC, + double VInitPreFillC, + unsigned int MaxNumSwathC, + unsigned int SwathHeightY, + unsigned int SwathHeightC, + double TWait, + bool XFCEnabled, + double XFCRemoteSurfaceFlipDelay, + bool InterlaceEnable, + bool ProgressiveToInterlaceUnitInOPP, + double *DSTXAfterScaler, + double *DSTYAfterScaler, + double *DestinationLinesForPrefetch, + double *PrefetchBandwidth, + double *DestinationLinesToRequestVMInVBlank, + double *DestinationLinesToRequestRowInVBlank, + double *VRatioPrefetchY, + double *VRatioPrefetchC, + double *RequiredPrefetchPixDataBW, + unsigned int *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata, + double *Tno_bw, + unsigned int *VUpdateOffsetPix, + double *VUpdateWidthPix, + double *VReadyOffsetPix); +static double RoundToDFSGranularityUp(double Clock, double VCOSpeed); +static double RoundToDFSGranularityDown(double Clock, double VCOSpeed); +static double CalculatePrefetchSourceLines( + struct display_mode_lib *mode_lib, + double VRatio, + double vtaps, + bool Interlace, + bool ProgressiveToInterlaceUnitInOPP, + unsigned int SwathHeight, + unsigned int ViewportYStart, + double *VInitPreFill, + unsigned int *MaxNumSwath); +static unsigned int CalculateVMAndRowBytes( + struct display_mode_lib *mode_lib, + bool DCCEnable, + unsigned int BlockHeight256Bytes, + unsigned int BlockWidth256Bytes, + enum source_format_class SourcePixelFormat, + unsigned int SurfaceTiling, + unsigned int BytePerPixel, + enum scan_direction_class ScanDirection, + unsigned int ViewportWidth, + unsigned int ViewportHeight, + unsigned int SwathWidthY, + bool GPUVMEnable, + unsigned int VMMPageSize, + unsigned int PTEBufferSizeInRequestsLuma, + unsigned int PDEProcessingBufIn64KBReqs, + unsigned int Pitch, + unsigned int DCCMetaPitch, + unsigned int *MacroTileWidth, + unsigned int *MetaRowByte, + unsigned int *PixelPTEBytesPerRow, + bool *PTEBufferSizeNotExceeded, + unsigned int *dpte_row_height, + unsigned int *meta_row_height); +static double CalculateTWait( + unsigned int PrefetchMode, + double DRAMClockChangeLatency, + double UrgentLatencyPixelDataOnly, + double SREnterPlusExitTime); +static double CalculateRemoteSurfaceFlipDelay( + struct display_mode_lib *mode_lib, + double VRatio, + double SwathWidth, + double Bpp, + double LineTime, + double XFCTSlvVupdateOffset, + double XFCTSlvVupdateWidth, + double XFCTSlvVreadyOffset, + double XFCXBUFLatencyTolerance, + double XFCFillBWOverhead, + double XFCSlvChunkSize, + double XFCBusTransportTime, + double TCalc, + double TWait, + double *SrcActiveDrainRate, + double *TInitXFill, + double *TslvChk); +static void CalculateActiveRowBandwidth( + bool GPUVMEnable, + enum source_format_class SourcePixelFormat, + double VRatio, + bool DCCEnable, + double LineTime, + unsigned int MetaRowByteLuma, + unsigned int MetaRowByteChroma, + unsigned int meta_row_height_luma, + unsigned int meta_row_height_chroma, + unsigned int PixelPTEBytesPerRowLuma, + unsigned int PixelPTEBytesPerRowChroma, + unsigned int dpte_row_height_luma, + unsigned int dpte_row_height_chroma, + double *meta_row_bw, + double *dpte_row_bw, + double *qual_row_bw); +static void CalculateFlipSchedule( + struct display_mode_lib *mode_lib, + double UrgentExtraLatency, + double UrgentLatencyPixelDataOnly, + unsigned int GPUVMMaxPageTableLevels, + bool GPUVMEnable, + double BandwidthAvailableForImmediateFlip, + unsigned int TotImmediateFlipBytes, + enum source_format_class SourcePixelFormat, + unsigned int ImmediateFlipBytes, + double LineTime, + double VRatio, + double Tno_bw, + double PDEAndMetaPTEBytesFrame, + unsigned int MetaRowByte, + unsigned int PixelPTEBytesPerRow, + bool DCCEnable, + unsigned int dpte_row_height, + unsigned int meta_row_height, + double qual_row_bw, + double *DestinationLinesToRequestVMInImmediateFlip, + double *DestinationLinesToRequestRowInImmediateFlip, + double *final_flip_bw, + bool *ImmediateFlipSupportedForPipe); +static double CalculateWriteBackDelay( + enum source_format_class WritebackPixelFormat, + double WritebackHRatio, + double WritebackVRatio, + unsigned int WritebackLumaHTaps, + unsigned int WritebackLumaVTaps, + unsigned int WritebackChromaHTaps, + unsigned int WritebackChromaVTaps, + unsigned int WritebackDestinationWidth); + +static void dml20_DisplayPipeConfiguration(struct display_mode_lib *mode_lib); +static void dml20_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation( + struct display_mode_lib *mode_lib); + +void dml20_recalculate(struct display_mode_lib *mode_lib) +{ + ModeSupportAndSystemConfiguration(mode_lib); + mode_lib->vba.FabricAndDRAMBandwidth = dml_min( + mode_lib->vba.DRAMSpeed * mode_lib->vba.NumberOfChannels * mode_lib->vba.DRAMChannelWidth, + mode_lib->vba.FabricClock * mode_lib->vba.FabricDatapathToDCNDataReturn) / 1000.0; + PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib); + dml20_DisplayPipeConfiguration(mode_lib); + dml20_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib); +} + +static double adjust_ReturnBW( + struct display_mode_lib *mode_lib, + double ReturnBW, + bool DCCEnabledAnyPlane, + double ReturnBandwidthToDCN) +{ + double CriticalCompression; + + if (DCCEnabledAnyPlane + && ReturnBandwidthToDCN + > mode_lib->vba.DCFCLK * mode_lib->vba.ReturnBusWidth / 4.0) + ReturnBW = + dml_min( + ReturnBW, + ReturnBandwidthToDCN * 4 + * (1.0 + - mode_lib->vba.UrgentLatencyPixelDataOnly + / ((mode_lib->vba.ROBBufferSizeInKByte + - mode_lib->vba.PixelChunkSizeInKByte) + * 1024 + / ReturnBandwidthToDCN + - mode_lib->vba.DCFCLK + * mode_lib->vba.ReturnBusWidth + / 4) + + mode_lib->vba.UrgentLatencyPixelDataOnly)); + + CriticalCompression = 2.0 * mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLK + * mode_lib->vba.UrgentLatencyPixelDataOnly + / (ReturnBandwidthToDCN * mode_lib->vba.UrgentLatencyPixelDataOnly + + (mode_lib->vba.ROBBufferSizeInKByte + - mode_lib->vba.PixelChunkSizeInKByte) + * 1024); + + if (DCCEnabledAnyPlane && CriticalCompression > 1.0 && CriticalCompression < 4.0) + ReturnBW = + dml_min( + ReturnBW, + 4.0 * ReturnBandwidthToDCN + * (mode_lib->vba.ROBBufferSizeInKByte + - mode_lib->vba.PixelChunkSizeInKByte) + * 1024 + * mode_lib->vba.ReturnBusWidth + * mode_lib->vba.DCFCLK + * mode_lib->vba.UrgentLatencyPixelDataOnly + / dml_pow( + (ReturnBandwidthToDCN + * mode_lib->vba.UrgentLatencyPixelDataOnly + + (mode_lib->vba.ROBBufferSizeInKByte + - mode_lib->vba.PixelChunkSizeInKByte) + * 1024), + 2)); + + return ReturnBW; +} + +static unsigned int dscceComputeDelay( + unsigned int bpc, + double bpp, + unsigned int sliceWidth, + unsigned int numSlices, + enum output_format_class pixelFormat) +{ + // valid bpc = source bits per component in the set of {8, 10, 12} + // valid bpp = increments of 1/16 of a bit + // min = 6/7/8 in N420/N422/444, respectively + // max = such that compression is 1:1 + //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode) + //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4} + //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420} + + // fixed value + unsigned int rcModelSize = 8192; + + // N422/N420 operate at 2 pixels per clock + unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, l, + Delay, pixels; + + if (pixelFormat == dm_n422 || pixelFormat == dm_420) + pixelsPerClock = 2; + // #all other modes operate at 1 pixel per clock + else + pixelsPerClock = 1; + + //initial transmit delay as per PPS + initalXmitDelay = dml_round(rcModelSize / 2.0 / bpp / pixelsPerClock); + + //compute ssm delay + if (bpc == 8) + D = 81; + else if (bpc == 10) + D = 89; + else + D = 113; + + //divide by pixel per cycle to compute slice width as seen by DSC + w = sliceWidth / pixelsPerClock; + + //422 mode has an additional cycle of delay + if (pixelFormat == dm_s422) + s = 1; + else + s = 0; + + //main calculation for the dscce + ix = initalXmitDelay + 45; + wx = (w + 2) / 3; + p = 3 * wx - w; + l0 = ix / w; + a = ix + p * l0; + ax = (a + 2) / 3 + D + 6 + 1; + l = (ax + wx - 1) / wx; + if ((ix % w) == 0 && p != 0) + lstall = 1; + else + lstall = 0; + Delay = l * wx * (numSlices - 1) + ax + s + lstall + 22; + + //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels + pixels = Delay * 3 * pixelsPerClock; + return pixels; +} + +static unsigned int dscComputeDelay(enum output_format_class pixelFormat) +{ + unsigned int Delay = 0; + + if (pixelFormat == dm_420) { + // sfr + Delay = Delay + 2; + // dsccif + Delay = Delay + 0; + // dscc - input deserializer + Delay = Delay + 3; + // dscc gets pixels every other cycle + Delay = Delay + 2; + // dscc - input cdc fifo + Delay = Delay + 12; + // dscc gets pixels every other cycle + Delay = Delay + 13; + // dscc - cdc uncertainty + Delay = Delay + 2; + // dscc - output cdc fifo + Delay = Delay + 7; + // dscc gets pixels every other cycle + Delay = Delay + 3; + // dscc - cdc uncertainty + Delay = Delay + 2; + // dscc - output serializer + Delay = Delay + 1; + // sft + Delay = Delay + 1; + } else if (pixelFormat == dm_n422) { + // sfr + Delay = Delay + 2; + // dsccif + Delay = Delay + 1; + // dscc - input deserializer + Delay = Delay + 5; + // dscc - input cdc fifo + Delay = Delay + 25; + // dscc - cdc uncertainty + Delay = Delay + 2; + // dscc - output cdc fifo + Delay = Delay + 10; + // dscc - cdc uncertainty + Delay = Delay + 2; + // dscc - output serializer + Delay = Delay + 1; + // sft + Delay = Delay + 1; + } else { + // sfr + Delay = Delay + 2; + // dsccif + Delay = Delay + 0; + // dscc - input deserializer + Delay = Delay + 3; + // dscc - input cdc fifo + Delay = Delay + 12; + // dscc - cdc uncertainty + Delay = Delay + 2; + // dscc - output cdc fifo + Delay = Delay + 7; + // dscc - output serializer + Delay = Delay + 1; + // dscc - cdc uncertainty + Delay = Delay + 2; + // sft + Delay = Delay + 1; + } + + return Delay; +} + +static bool CalculatePrefetchSchedule( + struct display_mode_lib *mode_lib, + double DPPCLK, + double DISPCLK, + double PixelClock, + double DCFCLKDeepSleep, + unsigned int DSCDelay, + unsigned int DPPPerPlane, + bool ScalerEnabled, + unsigned int NumberOfCursors, + double DPPCLKDelaySubtotal, + double DPPCLKDelaySCL, + double DPPCLKDelaySCLLBOnly, + double DPPCLKDelayCNVCFormater, + double DPPCLKDelayCNVCCursor, + double DISPCLKDelaySubtotal, + unsigned int ScalerRecoutWidth, + enum output_format_class OutputFormat, + unsigned int VBlank, + unsigned int HTotal, + unsigned int MaxInterDCNTileRepeaters, + unsigned int VStartup, + unsigned int PageTableLevels, + bool GPUVMEnable, + bool DynamicMetadataEnable, + unsigned int DynamicMetadataLinesBeforeActiveRequired, + unsigned int DynamicMetadataTransmittedBytes, + bool DCCEnable, + double UrgentLatencyPixelDataOnly, + double UrgentExtraLatency, + double TCalc, + unsigned int PDEAndMetaPTEBytesFrame, + unsigned int MetaRowByte, + unsigned int PixelPTEBytesPerRow, + double PrefetchSourceLinesY, + unsigned int SwathWidthY, + double BytePerPixelDETY, + double VInitPreFillY, + unsigned int MaxNumSwathY, + double PrefetchSourceLinesC, + double BytePerPixelDETC, + double VInitPreFillC, + unsigned int MaxNumSwathC, + unsigned int SwathHeightY, + unsigned int SwathHeightC, + double TWait, + bool XFCEnabled, + double XFCRemoteSurfaceFlipDelay, + bool InterlaceEnable, + bool ProgressiveToInterlaceUnitInOPP, + double *DSTXAfterScaler, + double *DSTYAfterScaler, + double *DestinationLinesForPrefetch, + double *PrefetchBandwidth, + double *DestinationLinesToRequestVMInVBlank, + double *DestinationLinesToRequestRowInVBlank, + double *VRatioPrefetchY, + double *VRatioPrefetchC, + double *RequiredPrefetchPixDataBW, + unsigned int *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata, + double *Tno_bw, + unsigned int *VUpdateOffsetPix, + double *VUpdateWidthPix, + double *VReadyOffsetPix) +{ + bool MyError = false; + unsigned int DPPCycles, DISPCLKCycles; + double DSTTotalPixelsAfterScaler, TotalRepeaterDelayTime; + double Tdm, LineTime, Tsetup; + double dst_y_prefetch_equ; + double Tsw_oto; + double prefetch_bw_oto; + double Tvm_oto; + double Tr0_oto; + double Tpre_oto; + double dst_y_prefetch_oto; + double TimeForFetchingMetaPTE = 0; + double TimeForFetchingRowInVBlank = 0; + double LinesToRequestPrefetchPixelData = 0; + + if (ScalerEnabled) + DPPCycles = DPPCLKDelaySubtotal + DPPCLKDelaySCL; + else + DPPCycles = DPPCLKDelaySubtotal + DPPCLKDelaySCLLBOnly; + + DPPCycles = DPPCycles + DPPCLKDelayCNVCFormater + NumberOfCursors * DPPCLKDelayCNVCCursor; + + DISPCLKCycles = DISPCLKDelaySubtotal; + + if (DPPCLK == 0.0 || DISPCLK == 0.0) + return true; + + *DSTXAfterScaler = DPPCycles * PixelClock / DPPCLK + DISPCLKCycles * PixelClock / DISPCLK + + DSCDelay; + + if (DPPPerPlane > 1) + *DSTXAfterScaler = *DSTXAfterScaler + ScalerRecoutWidth; + + if (OutputFormat == dm_420 || (InterlaceEnable && ProgressiveToInterlaceUnitInOPP)) + *DSTYAfterScaler = 1; + else + *DSTYAfterScaler = 0; + + DSTTotalPixelsAfterScaler = ((double) (*DSTYAfterScaler * HTotal)) + *DSTXAfterScaler; + *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / HTotal, 1); + *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * HTotal)); + + *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1); + TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2.0 / DPPCLK + 3.0 / DISPCLK); + *VUpdateWidthPix = (14.0 / DCFCLKDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime) + * PixelClock; + + *VReadyOffsetPix = dml_max( + 150.0 / DPPCLK, + TotalRepeaterDelayTime + 20.0 / DCFCLKDeepSleep + 10.0 / DPPCLK) + * PixelClock; + + Tsetup = (double) (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock; + + LineTime = (double) HTotal / PixelClock; + + if (DynamicMetadataEnable) { + double Tdmbf, Tdmec, Tdmsks; + + Tdm = dml_max(0.0, UrgentExtraLatency - TCalc); + Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK; + Tdmec = LineTime; + if (DynamicMetadataLinesBeforeActiveRequired == 0) + Tdmsks = VBlank * LineTime / 2.0; + else + Tdmsks = DynamicMetadataLinesBeforeActiveRequired * LineTime; + if (InterlaceEnable && !ProgressiveToInterlaceUnitInOPP) + Tdmsks = Tdmsks / 2; + if (VStartup * LineTime + < Tsetup + TWait + UrgentExtraLatency + Tdmbf + Tdmec + Tdmsks) { + MyError = true; + *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata = (Tsetup + TWait + + UrgentExtraLatency + Tdmbf + Tdmec + Tdmsks) / LineTime; + } else + *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata = 0.0; + } else + Tdm = 0; + + if (GPUVMEnable) { + if (PageTableLevels == 4) + *Tno_bw = UrgentExtraLatency + UrgentLatencyPixelDataOnly; + else if (PageTableLevels == 3) + *Tno_bw = UrgentExtraLatency; + else + *Tno_bw = 0; + } else if (DCCEnable) + *Tno_bw = LineTime; + else + *Tno_bw = LineTime / 4; + + dst_y_prefetch_equ = VStartup - dml_max(TCalc + TWait, XFCRemoteSurfaceFlipDelay) / LineTime + - (Tsetup + Tdm) / LineTime + - (*DSTYAfterScaler + *DSTXAfterScaler / HTotal); + + Tsw_oto = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime; + + prefetch_bw_oto = (MetaRowByte + PixelPTEBytesPerRow + + PrefetchSourceLinesY * SwathWidthY * dml_ceil(BytePerPixelDETY, 1) + + PrefetchSourceLinesC * SwathWidthY / 2 * dml_ceil(BytePerPixelDETC, 2)) + / Tsw_oto; + + if (GPUVMEnable == true) { + Tvm_oto = + dml_max( + *Tno_bw + PDEAndMetaPTEBytesFrame / prefetch_bw_oto, + dml_max( + UrgentExtraLatency + + UrgentLatencyPixelDataOnly + * (PageTableLevels + - 1), + LineTime / 4.0)); + } else + Tvm_oto = LineTime / 4.0; + + if ((GPUVMEnable == true || DCCEnable == true)) { + Tr0_oto = dml_max( + (MetaRowByte + PixelPTEBytesPerRow) / prefetch_bw_oto, + dml_max(UrgentLatencyPixelDataOnly, dml_max(LineTime - Tvm_oto, LineTime / 4))); + } else + Tr0_oto = LineTime - Tvm_oto; + + Tpre_oto = Tvm_oto + Tr0_oto + Tsw_oto; + + dst_y_prefetch_oto = Tpre_oto / LineTime; + + if (dst_y_prefetch_oto < dst_y_prefetch_equ) + *DestinationLinesForPrefetch = dst_y_prefetch_oto; + else + *DestinationLinesForPrefetch = dst_y_prefetch_equ; + + *DestinationLinesForPrefetch = dml_floor(4.0 * (*DestinationLinesForPrefetch + 0.125), 1) + / 4; + + dml_print("DML: VStartup: %d\n", VStartup); + dml_print("DML: TCalc: %f\n", TCalc); + dml_print("DML: TWait: %f\n", TWait); + dml_print("DML: XFCRemoteSurfaceFlipDelay: %f\n", XFCRemoteSurfaceFlipDelay); + dml_print("DML: LineTime: %f\n", LineTime); + dml_print("DML: Tsetup: %f\n", Tsetup); + dml_print("DML: Tdm: %f\n", Tdm); + dml_print("DML: DSTYAfterScaler: %f\n", *DSTYAfterScaler); + dml_print("DML: DSTXAfterScaler: %f\n", *DSTXAfterScaler); + dml_print("DML: HTotal: %d\n", HTotal); + + *PrefetchBandwidth = 0; + *DestinationLinesToRequestVMInVBlank = 0; + *DestinationLinesToRequestRowInVBlank = 0; + *VRatioPrefetchY = 0; + *VRatioPrefetchC = 0; + *RequiredPrefetchPixDataBW = 0; + if (*DestinationLinesForPrefetch > 1) { + *PrefetchBandwidth = (PDEAndMetaPTEBytesFrame + 2 * MetaRowByte + + 2 * PixelPTEBytesPerRow + + PrefetchSourceLinesY * SwathWidthY * dml_ceil(BytePerPixelDETY, 1) + + PrefetchSourceLinesC * SwathWidthY / 2 + * dml_ceil(BytePerPixelDETC, 2)) + / (*DestinationLinesForPrefetch * LineTime - *Tno_bw); + if (GPUVMEnable) { + TimeForFetchingMetaPTE = + dml_max( + *Tno_bw + + (double) PDEAndMetaPTEBytesFrame + / *PrefetchBandwidth, + dml_max( + UrgentExtraLatency + + UrgentLatencyPixelDataOnly + * (PageTableLevels + - 1), + LineTime / 4)); + } else { + if (NumberOfCursors > 0 || XFCEnabled) + TimeForFetchingMetaPTE = LineTime / 4; + else + TimeForFetchingMetaPTE = 0.0; + } + + if ((GPUVMEnable == true || DCCEnable == true)) { + TimeForFetchingRowInVBlank = + dml_max( + (MetaRowByte + PixelPTEBytesPerRow) + / *PrefetchBandwidth, + dml_max( + UrgentLatencyPixelDataOnly, + dml_max( + LineTime + - TimeForFetchingMetaPTE, + LineTime + / 4.0))); + } else { + if (NumberOfCursors > 0 || XFCEnabled) + TimeForFetchingRowInVBlank = LineTime - TimeForFetchingMetaPTE; + else + TimeForFetchingRowInVBlank = 0.0; + } + + *DestinationLinesToRequestVMInVBlank = dml_floor( + 4.0 * (TimeForFetchingMetaPTE / LineTime + 0.125), + 1) / 4.0; + + *DestinationLinesToRequestRowInVBlank = dml_floor( + 4.0 * (TimeForFetchingRowInVBlank / LineTime + 0.125), + 1) / 4.0; + + LinesToRequestPrefetchPixelData = + *DestinationLinesForPrefetch + - ((NumberOfCursors > 0 || GPUVMEnable + || DCCEnable) ? + (*DestinationLinesToRequestVMInVBlank + + *DestinationLinesToRequestRowInVBlank) : + 0.0); + + if (LinesToRequestPrefetchPixelData > 0) { + + *VRatioPrefetchY = (double) PrefetchSourceLinesY + / LinesToRequestPrefetchPixelData; + *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); + if ((SwathHeightY > 4) && (VInitPreFillY > 3)) { + if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) { + *VRatioPrefetchY = + dml_max( + (double) PrefetchSourceLinesY + / LinesToRequestPrefetchPixelData, + (double) MaxNumSwathY + * SwathHeightY + / (LinesToRequestPrefetchPixelData + - (VInitPreFillY + - 3.0) + / 2.0)); + *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); + } else { + MyError = true; + *VRatioPrefetchY = 0; + } + } + + *VRatioPrefetchC = (double) PrefetchSourceLinesC + / LinesToRequestPrefetchPixelData; + *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); + + if ((SwathHeightC > 4)) { + if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) { + *VRatioPrefetchC = + dml_max( + *VRatioPrefetchC, + (double) MaxNumSwathC + * SwathHeightC + / (LinesToRequestPrefetchPixelData + - (VInitPreFillC + - 3.0) + / 2.0)); + *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); + } else { + MyError = true; + *VRatioPrefetchC = 0; + } + } + + *RequiredPrefetchPixDataBW = + DPPPerPlane + * ((double) PrefetchSourceLinesY + / LinesToRequestPrefetchPixelData + * dml_ceil( + BytePerPixelDETY, + 1) + + (double) PrefetchSourceLinesC + / LinesToRequestPrefetchPixelData + * dml_ceil( + BytePerPixelDETC, + 2) + / 2) + * SwathWidthY / LineTime; + } else { + MyError = true; + *VRatioPrefetchY = 0; + *VRatioPrefetchC = 0; + *RequiredPrefetchPixDataBW = 0; + } + + } else { + MyError = true; + } + + if (MyError) { + *PrefetchBandwidth = 0; + TimeForFetchingMetaPTE = 0; + TimeForFetchingRowInVBlank = 0; + *DestinationLinesToRequestVMInVBlank = 0; + *DestinationLinesToRequestRowInVBlank = 0; + *DestinationLinesForPrefetch = 0; + LinesToRequestPrefetchPixelData = 0; + *VRatioPrefetchY = 0; + *VRatioPrefetchC = 0; + *RequiredPrefetchPixDataBW = 0; + } + + return MyError; +} + +static double RoundToDFSGranularityUp(double Clock, double VCOSpeed) +{ + return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1); +} + +static double RoundToDFSGranularityDown(double Clock, double VCOSpeed) +{ + return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4 / Clock, 1); +} + +static double CalculatePrefetchSourceLines( + struct display_mode_lib *mode_lib, + double VRatio, + double vtaps, + bool Interlace, + bool ProgressiveToInterlaceUnitInOPP, + unsigned int SwathHeight, + unsigned int ViewportYStart, + double *VInitPreFill, + unsigned int *MaxNumSwath) +{ + unsigned int MaxPartialSwath; + + if (ProgressiveToInterlaceUnitInOPP) + *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1); + else + *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1); + + if (!mode_lib->vba.IgnoreViewportPositioning) { + + *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0; + + if (*VInitPreFill > 1.0) + MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight; + else + MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) + % SwathHeight; + MaxPartialSwath = dml_max(1U, MaxPartialSwath); + + } else { + + if (ViewportYStart != 0) + dml_print( + "WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n"); + + *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1); + + if (*VInitPreFill > 1.0) + MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight; + else + MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) + % SwathHeight; + } + + return *MaxNumSwath * SwathHeight + MaxPartialSwath; +} + +static unsigned int CalculateVMAndRowBytes( + struct display_mode_lib *mode_lib, + bool DCCEnable, + unsigned int BlockHeight256Bytes, + unsigned int BlockWidth256Bytes, + enum source_format_class SourcePixelFormat, + unsigned int SurfaceTiling, + unsigned int BytePerPixel, + enum scan_direction_class ScanDirection, + unsigned int ViewportWidth, + unsigned int ViewportHeight, + unsigned int SwathWidth, + bool GPUVMEnable, + unsigned int VMMPageSize, + unsigned int PTEBufferSizeInRequestsLuma, + unsigned int PDEProcessingBufIn64KBReqs, + unsigned int Pitch, + unsigned int DCCMetaPitch, + unsigned int *MacroTileWidth, + unsigned int *MetaRowByte, + unsigned int *PixelPTEBytesPerRow, + bool *PTEBufferSizeNotExceeded, + unsigned int *dpte_row_height, + unsigned int *meta_row_height) +{ + unsigned int MetaRequestHeight; + unsigned int MetaRequestWidth; + unsigned int MetaSurfWidth; + unsigned int MetaSurfHeight; + unsigned int MPDEBytesFrame; + unsigned int MetaPTEBytesFrame; + unsigned int DCCMetaSurfaceBytes; + + unsigned int MacroTileSizeBytes; + unsigned int MacroTileHeight; + unsigned int DPDE0BytesFrame; + unsigned int ExtraDPDEBytesFrame; + unsigned int PDEAndMetaPTEBytesFrame; + + if (DCCEnable == true) { + MetaRequestHeight = 8 * BlockHeight256Bytes; + MetaRequestWidth = 8 * BlockWidth256Bytes; + if (ScanDirection == dm_horz) { + *meta_row_height = MetaRequestHeight; + MetaSurfWidth = dml_ceil((double) SwathWidth - 1, MetaRequestWidth) + + MetaRequestWidth; + *MetaRowByte = MetaSurfWidth * MetaRequestHeight * BytePerPixel / 256.0; + } else { + *meta_row_height = MetaRequestWidth; + MetaSurfHeight = dml_ceil((double) SwathWidth - 1, MetaRequestHeight) + + MetaRequestHeight; + *MetaRowByte = MetaSurfHeight * MetaRequestWidth * BytePerPixel / 256.0; + } + if (ScanDirection == dm_horz) { + DCCMetaSurfaceBytes = DCCMetaPitch + * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + + 64 * BlockHeight256Bytes) * BytePerPixel + / 256; + } else { + DCCMetaSurfaceBytes = DCCMetaPitch + * (dml_ceil( + (double) ViewportHeight - 1, + 64 * BlockHeight256Bytes) + + 64 * BlockHeight256Bytes) * BytePerPixel + / 256; + } + if (GPUVMEnable == true) { + MetaPTEBytesFrame = (dml_ceil( + (double) (DCCMetaSurfaceBytes - VMMPageSize) + / (8 * VMMPageSize), + 1) + 1) * 64; + MPDEBytesFrame = 128 * (mode_lib->vba.GPUVMMaxPageTableLevels - 1); + } else { + MetaPTEBytesFrame = 0; + MPDEBytesFrame = 0; + } + } else { + MetaPTEBytesFrame = 0; + MPDEBytesFrame = 0; + *MetaRowByte = 0; + } + + if (SurfaceTiling == dm_sw_linear || SurfaceTiling == dm_sw_gfx7_2d_thin_gl || SurfaceTiling == dm_sw_gfx7_2d_thin_l_vp) { + MacroTileSizeBytes = 256; + MacroTileHeight = BlockHeight256Bytes; + } else if (SurfaceTiling == dm_sw_4kb_s || SurfaceTiling == dm_sw_4kb_s_x + || SurfaceTiling == dm_sw_4kb_d || SurfaceTiling == dm_sw_4kb_d_x) { + MacroTileSizeBytes = 4096; + MacroTileHeight = 4 * BlockHeight256Bytes; + } else if (SurfaceTiling == dm_sw_64kb_s || SurfaceTiling == dm_sw_64kb_s_t + || SurfaceTiling == dm_sw_64kb_s_x || SurfaceTiling == dm_sw_64kb_d + || SurfaceTiling == dm_sw_64kb_d_t || SurfaceTiling == dm_sw_64kb_d_x + || SurfaceTiling == dm_sw_64kb_r_x) { + MacroTileSizeBytes = 65536; + MacroTileHeight = 16 * BlockHeight256Bytes; + } else { + MacroTileSizeBytes = 262144; + MacroTileHeight = 32 * BlockHeight256Bytes; + } + *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight; + + if (GPUVMEnable == true && mode_lib->vba.GPUVMMaxPageTableLevels > 1) { + if (ScanDirection == dm_horz) { + DPDE0BytesFrame = + 64 + * (dml_ceil( + ((Pitch + * (dml_ceil( + ViewportHeight + - 1, + MacroTileHeight) + + MacroTileHeight) + * BytePerPixel) + - MacroTileSizeBytes) + / (8 + * 2097152), + 1) + 1); + } else { + DPDE0BytesFrame = + 64 + * (dml_ceil( + ((Pitch + * (dml_ceil( + (double) SwathWidth + - 1, + MacroTileHeight) + + MacroTileHeight) + * BytePerPixel) + - MacroTileSizeBytes) + / (8 + * 2097152), + 1) + 1); + } + ExtraDPDEBytesFrame = 128 * (mode_lib->vba.GPUVMMaxPageTableLevels - 2); + } else { + DPDE0BytesFrame = 0; + ExtraDPDEBytesFrame = 0; + } + + PDEAndMetaPTEBytesFrame = MetaPTEBytesFrame + MPDEBytesFrame + DPDE0BytesFrame + + ExtraDPDEBytesFrame; + + if (GPUVMEnable == true) { + unsigned int PTERequestSize; + unsigned int PixelPTEReqHeight; + unsigned int PixelPTEReqWidth; + double FractionOfPTEReturnDrop; + unsigned int EffectivePDEProcessingBufIn64KBReqs; + + if (SurfaceTiling == dm_sw_linear) { + PixelPTEReqHeight = 1; + PixelPTEReqWidth = 8.0 * VMMPageSize / BytePerPixel; + PTERequestSize = 64; + FractionOfPTEReturnDrop = 0; + } else if (MacroTileSizeBytes == 4096) { + PixelPTEReqHeight = MacroTileHeight; + PixelPTEReqWidth = 8 * *MacroTileWidth; + PTERequestSize = 64; + if (ScanDirection == dm_horz) + FractionOfPTEReturnDrop = 0; + else + FractionOfPTEReturnDrop = 7 / 8; + } else if (VMMPageSize == 4096 && MacroTileSizeBytes > 4096) { + PixelPTEReqHeight = 16 * BlockHeight256Bytes; + PixelPTEReqWidth = 16 * BlockWidth256Bytes; + PTERequestSize = 128; + FractionOfPTEReturnDrop = 0; + } else { + PixelPTEReqHeight = MacroTileHeight; + PixelPTEReqWidth = 8 * *MacroTileWidth; + PTERequestSize = 64; + FractionOfPTEReturnDrop = 0; + } + + if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) + EffectivePDEProcessingBufIn64KBReqs = PDEProcessingBufIn64KBReqs / 2; + else + EffectivePDEProcessingBufIn64KBReqs = PDEProcessingBufIn64KBReqs; + + if (SurfaceTiling == dm_sw_linear) { + *dpte_row_height = + dml_min( + 128, + 1 + << (unsigned int) dml_floor( + dml_log2( + dml_min( + (double) PTEBufferSizeInRequestsLuma + * PixelPTEReqWidth, + EffectivePDEProcessingBufIn64KBReqs + * 65536.0 + / BytePerPixel) + / Pitch), + 1)); + *PixelPTEBytesPerRow = PTERequestSize + * (dml_ceil( + (double) (Pitch * *dpte_row_height - 1) + / PixelPTEReqWidth, + 1) + 1); + } else if (ScanDirection == dm_horz) { + *dpte_row_height = PixelPTEReqHeight; + *PixelPTEBytesPerRow = PTERequestSize + * (dml_ceil(((double) SwathWidth - 1) / PixelPTEReqWidth, 1) + + 1); + } else { + *dpte_row_height = dml_min(PixelPTEReqWidth, *MacroTileWidth); + *PixelPTEBytesPerRow = PTERequestSize + * (dml_ceil( + ((double) SwathWidth - 1) + / PixelPTEReqHeight, + 1) + 1); + } + if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) + <= 64 * PTEBufferSizeInRequestsLuma) { + *PTEBufferSizeNotExceeded = true; + } else { + *PTEBufferSizeNotExceeded = false; + } + } else { + *PixelPTEBytesPerRow = 0; + *PTEBufferSizeNotExceeded = true; + } + + return PDEAndMetaPTEBytesFrame; +} + +static void dml20_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation( + struct display_mode_lib *mode_lib) +{ + unsigned int j, k; + + mode_lib->vba.WritebackDISPCLK = 0.0; + mode_lib->vba.DISPCLKWithRamping = 0; + mode_lib->vba.DISPCLKWithoutRamping = 0; + mode_lib->vba.GlobalDPPCLK = 0.0; + + // dml_ml->vba.DISPCLK and dml_ml->vba.DPPCLK Calculation + // + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.WritebackEnable[k]) { + mode_lib->vba.WritebackDISPCLK = + dml_max( + mode_lib->vba.WritebackDISPCLK, + CalculateWriteBackDISPCLK( + mode_lib->vba.WritebackPixelFormat[k], + mode_lib->vba.PixelClock[k], + mode_lib->vba.WritebackHRatio[k], + mode_lib->vba.WritebackVRatio[k], + mode_lib->vba.WritebackLumaHTaps[k], + mode_lib->vba.WritebackLumaVTaps[k], + mode_lib->vba.WritebackChromaHTaps[k], + mode_lib->vba.WritebackChromaVTaps[k], + mode_lib->vba.WritebackDestinationWidth[k], + mode_lib->vba.HTotal[k], + mode_lib->vba.WritebackChromaLineBufferWidth)); + } + } + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.HRatio[k] > 1) { + mode_lib->vba.PSCL_THROUGHPUT_LUMA[k] = dml_min( + mode_lib->vba.MaxDCHUBToPSCLThroughput, + mode_lib->vba.MaxPSCLToLBThroughput + * mode_lib->vba.HRatio[k] + / dml_ceil( + mode_lib->vba.htaps[k] + / 6.0, + 1)); + } else { + mode_lib->vba.PSCL_THROUGHPUT_LUMA[k] = dml_min( + mode_lib->vba.MaxDCHUBToPSCLThroughput, + mode_lib->vba.MaxPSCLToLBThroughput); + } + + mode_lib->vba.DPPCLKUsingSingleDPPLuma = + mode_lib->vba.PixelClock[k] + * dml_max( + mode_lib->vba.vtaps[k] / 6.0 + * dml_min( + 1.0, + mode_lib->vba.HRatio[k]), + dml_max( + mode_lib->vba.HRatio[k] + * mode_lib->vba.VRatio[k] + / mode_lib->vba.PSCL_THROUGHPUT_LUMA[k], + 1.0)); + + if ((mode_lib->vba.htaps[k] > 6 || mode_lib->vba.vtaps[k] > 6) + && mode_lib->vba.DPPCLKUsingSingleDPPLuma + < 2 * mode_lib->vba.PixelClock[k]) { + mode_lib->vba.DPPCLKUsingSingleDPPLuma = 2 * mode_lib->vba.PixelClock[k]; + } + + if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8 + && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) { + mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k] = 0.0; + mode_lib->vba.DPPCLKUsingSingleDPP[k] = + mode_lib->vba.DPPCLKUsingSingleDPPLuma; + } else { + if (mode_lib->vba.HRatio[k] > 1) { + mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k] = + dml_min( + mode_lib->vba.MaxDCHUBToPSCLThroughput, + mode_lib->vba.MaxPSCLToLBThroughput + * mode_lib->vba.HRatio[k] + / 2 + / dml_ceil( + mode_lib->vba.HTAPsChroma[k] + / 6.0, + 1.0)); + } else { + mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k] = dml_min( + mode_lib->vba.MaxDCHUBToPSCLThroughput, + mode_lib->vba.MaxPSCLToLBThroughput); + } + mode_lib->vba.DPPCLKUsingSingleDPPChroma = + mode_lib->vba.PixelClock[k] + * dml_max( + mode_lib->vba.VTAPsChroma[k] + / 6.0 + * dml_min( + 1.0, + mode_lib->vba.HRatio[k] + / 2), + dml_max( + mode_lib->vba.HRatio[k] + * mode_lib->vba.VRatio[k] + / 4 + / mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k], + 1.0)); + + if ((mode_lib->vba.HTAPsChroma[k] > 6 || mode_lib->vba.VTAPsChroma[k] > 6) + && mode_lib->vba.DPPCLKUsingSingleDPPChroma + < 2 * mode_lib->vba.PixelClock[k]) { + mode_lib->vba.DPPCLKUsingSingleDPPChroma = 2 + * mode_lib->vba.PixelClock[k]; + } + + mode_lib->vba.DPPCLKUsingSingleDPP[k] = dml_max( + mode_lib->vba.DPPCLKUsingSingleDPPLuma, + mode_lib->vba.DPPCLKUsingSingleDPPChroma); + } + } + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.BlendingAndTiming[k] != k) + continue; + if (mode_lib->vba.ODMCombineEnabled[k]) { + mode_lib->vba.DISPCLKWithRamping = + dml_max( + mode_lib->vba.DISPCLKWithRamping, + mode_lib->vba.PixelClock[k] / 2 + * (1 + + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading + / 100) + * (1 + + mode_lib->vba.DISPCLKRampingMargin + / 100)); + mode_lib->vba.DISPCLKWithoutRamping = + dml_max( + mode_lib->vba.DISPCLKWithoutRamping, + mode_lib->vba.PixelClock[k] / 2 + * (1 + + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading + / 100)); + } else if (!mode_lib->vba.ODMCombineEnabled[k]) { + mode_lib->vba.DISPCLKWithRamping = + dml_max( + mode_lib->vba.DISPCLKWithRamping, + mode_lib->vba.PixelClock[k] + * (1 + + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading + / 100) + * (1 + + mode_lib->vba.DISPCLKRampingMargin + / 100)); + mode_lib->vba.DISPCLKWithoutRamping = + dml_max( + mode_lib->vba.DISPCLKWithoutRamping, + mode_lib->vba.PixelClock[k] + * (1 + + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading + / 100)); + } + } + + mode_lib->vba.DISPCLKWithRamping = dml_max( + mode_lib->vba.DISPCLKWithRamping, + mode_lib->vba.WritebackDISPCLK); + mode_lib->vba.DISPCLKWithoutRamping = dml_max( + mode_lib->vba.DISPCLKWithoutRamping, + mode_lib->vba.WritebackDISPCLK); + + ASSERT(mode_lib->vba.DISPCLKDPPCLKVCOSpeed != 0); + mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp( + mode_lib->vba.DISPCLKWithRamping, + mode_lib->vba.DISPCLKDPPCLKVCOSpeed); + mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp( + mode_lib->vba.DISPCLKWithoutRamping, + mode_lib->vba.DISPCLKDPPCLKVCOSpeed); + mode_lib->vba.MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown( + mode_lib->vba.soc.clock_limits[mode_lib->vba.soc.num_states].dispclk_mhz, + mode_lib->vba.DISPCLKDPPCLKVCOSpeed); + if (mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity + > mode_lib->vba.MaxDispclkRoundedToDFSGranularity) { + mode_lib->vba.DISPCLK_calculated = + mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity; + } else if (mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity + > mode_lib->vba.MaxDispclkRoundedToDFSGranularity) { + mode_lib->vba.DISPCLK_calculated = mode_lib->vba.MaxDispclkRoundedToDFSGranularity; + } else { + mode_lib->vba.DISPCLK_calculated = + mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity; + } + DTRACE(" dispclk_mhz (calculated) = %f", mode_lib->vba.DISPCLK_calculated); + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.DPPPerPlane[k] == 0) { + mode_lib->vba.DPPCLK_calculated[k] = 0; + } else { + mode_lib->vba.DPPCLK_calculated[k] = mode_lib->vba.DPPCLKUsingSingleDPP[k] + / mode_lib->vba.DPPPerPlane[k] + * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100); + } + mode_lib->vba.GlobalDPPCLK = dml_max( + mode_lib->vba.GlobalDPPCLK, + mode_lib->vba.DPPCLK_calculated[k]); + } + mode_lib->vba.GlobalDPPCLK = RoundToDFSGranularityUp( + mode_lib->vba.GlobalDPPCLK, + mode_lib->vba.DISPCLKDPPCLKVCOSpeed); + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + mode_lib->vba.DPPCLK_calculated[k] = mode_lib->vba.GlobalDPPCLK / 255 + * dml_ceil( + mode_lib->vba.DPPCLK_calculated[k] * 255 + / mode_lib->vba.GlobalDPPCLK, + 1); + DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, mode_lib->vba.DPPCLK_calculated[k]); + } + + // Urgent Watermark + mode_lib->vba.DCCEnabledAnyPlane = false; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) + if (mode_lib->vba.DCCEnable[k]) + mode_lib->vba.DCCEnabledAnyPlane = true; + + mode_lib->vba.ReturnBandwidthToDCN = dml_min( + mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLK, + mode_lib->vba.FabricAndDRAMBandwidth * 1000) + * mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly / 100; + + mode_lib->vba.ReturnBW = mode_lib->vba.ReturnBandwidthToDCN; + mode_lib->vba.ReturnBW = adjust_ReturnBW( + mode_lib, + mode_lib->vba.ReturnBW, + mode_lib->vba.DCCEnabledAnyPlane, + mode_lib->vba.ReturnBandwidthToDCN); + + // Let's do this calculation again?? + mode_lib->vba.ReturnBandwidthToDCN = dml_min( + mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLK, + mode_lib->vba.FabricAndDRAMBandwidth * 1000); + mode_lib->vba.ReturnBW = adjust_ReturnBW( + mode_lib, + mode_lib->vba.ReturnBW, + mode_lib->vba.DCCEnabledAnyPlane, + mode_lib->vba.ReturnBandwidthToDCN); + + DTRACE(" dcfclk_mhz = %f", mode_lib->vba.DCFCLK); + DTRACE(" return_bw_to_dcn = %f", mode_lib->vba.ReturnBandwidthToDCN); + DTRACE(" return_bus_bw = %f", mode_lib->vba.ReturnBW); + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + bool MainPlaneDoesODMCombine = false; + + if (mode_lib->vba.SourceScan[k] == dm_horz) + mode_lib->vba.SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportWidth[k]; + else + mode_lib->vba.SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportHeight[k]; + + if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) + MainPlaneDoesODMCombine = true; + for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) + if (mode_lib->vba.BlendingAndTiming[k] == j + && mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) + MainPlaneDoesODMCombine = true; + + if (MainPlaneDoesODMCombine == true) + mode_lib->vba.SwathWidthY[k] = dml_min( + (double) mode_lib->vba.SwathWidthSingleDPPY[k], + dml_round( + mode_lib->vba.HActive[k] / 2.0 + * mode_lib->vba.HRatio[k])); + else { + if (mode_lib->vba.DPPPerPlane[k] == 0) { + mode_lib->vba.SwathWidthY[k] = 0; + } else { + mode_lib->vba.SwathWidthY[k] = mode_lib->vba.SwathWidthSingleDPPY[k] + / mode_lib->vba.DPPPerPlane[k]; + } + } + } + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) { + mode_lib->vba.BytePerPixelDETY[k] = 8; + mode_lib->vba.BytePerPixelDETC[k] = 0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32) { + mode_lib->vba.BytePerPixelDETY[k] = 4; + mode_lib->vba.BytePerPixelDETC[k] = 0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_16) { + mode_lib->vba.BytePerPixelDETY[k] = 2; + mode_lib->vba.BytePerPixelDETC[k] = 0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_8) { + mode_lib->vba.BytePerPixelDETY[k] = 1; + mode_lib->vba.BytePerPixelDETC[k] = 0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) { + mode_lib->vba.BytePerPixelDETY[k] = 1; + mode_lib->vba.BytePerPixelDETC[k] = 2; + } else { // dm_420_10 + mode_lib->vba.BytePerPixelDETY[k] = 4.0 / 3.0; + mode_lib->vba.BytePerPixelDETC[k] = 8.0 / 3.0; + } + } + + mode_lib->vba.TotalDataReadBandwidth = 0.0; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + mode_lib->vba.ReadBandwidthPlaneLuma[k] = mode_lib->vba.SwathWidthSingleDPPY[k] + * dml_ceil(mode_lib->vba.BytePerPixelDETY[k], 1) + / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) + * mode_lib->vba.VRatio[k]; + mode_lib->vba.ReadBandwidthPlaneChroma[k] = mode_lib->vba.SwathWidthSingleDPPY[k] + / 2 * dml_ceil(mode_lib->vba.BytePerPixelDETC[k], 2) + / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) + * mode_lib->vba.VRatio[k] / 2; + DTRACE( + " read_bw[%i] = %fBps", + k, + mode_lib->vba.ReadBandwidthPlaneLuma[k] + + mode_lib->vba.ReadBandwidthPlaneChroma[k]); + mode_lib->vba.TotalDataReadBandwidth += mode_lib->vba.ReadBandwidthPlaneLuma[k] + + mode_lib->vba.ReadBandwidthPlaneChroma[k]; + } + + mode_lib->vba.TotalDCCActiveDPP = 0; + mode_lib->vba.TotalActiveDPP = 0; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + mode_lib->vba.TotalActiveDPP = mode_lib->vba.TotalActiveDPP + + mode_lib->vba.DPPPerPlane[k]; + if (mode_lib->vba.DCCEnable[k]) + mode_lib->vba.TotalDCCActiveDPP = mode_lib->vba.TotalDCCActiveDPP + + mode_lib->vba.DPPPerPlane[k]; + } + + mode_lib->vba.UrgentRoundTripAndOutOfOrderLatency = + (mode_lib->vba.RoundTripPingLatencyCycles + 32) / mode_lib->vba.DCFCLK + + mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly + * mode_lib->vba.NumberOfChannels + / mode_lib->vba.ReturnBW; + + mode_lib->vba.LastPixelOfLineExtraWatermark = 0; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + double DataFabricLineDeliveryTimeLuma, DataFabricLineDeliveryTimeChroma; + + if (mode_lib->vba.VRatio[k] <= 1.0) + mode_lib->vba.DisplayPipeLineDeliveryTimeLuma[k] = + (double) mode_lib->vba.SwathWidthY[k] + * mode_lib->vba.DPPPerPlane[k] + / mode_lib->vba.HRatio[k] + / mode_lib->vba.PixelClock[k]; + else + mode_lib->vba.DisplayPipeLineDeliveryTimeLuma[k] = + (double) mode_lib->vba.SwathWidthY[k] + / mode_lib->vba.PSCL_THROUGHPUT_LUMA[k] + / mode_lib->vba.DPPCLK[k]; + + DataFabricLineDeliveryTimeLuma = mode_lib->vba.SwathWidthSingleDPPY[k] + * mode_lib->vba.SwathHeightY[k] + * dml_ceil(mode_lib->vba.BytePerPixelDETY[k], 1) + / (mode_lib->vba.ReturnBW * mode_lib->vba.ReadBandwidthPlaneLuma[k] + / mode_lib->vba.TotalDataReadBandwidth); + mode_lib->vba.LastPixelOfLineExtraWatermark = dml_max( + mode_lib->vba.LastPixelOfLineExtraWatermark, + DataFabricLineDeliveryTimeLuma + - mode_lib->vba.DisplayPipeLineDeliveryTimeLuma[k]); + + if (mode_lib->vba.BytePerPixelDETC[k] == 0) + mode_lib->vba.DisplayPipeLineDeliveryTimeChroma[k] = 0.0; + else if (mode_lib->vba.VRatio[k] / 2.0 <= 1.0) + mode_lib->vba.DisplayPipeLineDeliveryTimeChroma[k] = + mode_lib->vba.SwathWidthY[k] / 2.0 + * mode_lib->vba.DPPPerPlane[k] + / (mode_lib->vba.HRatio[k] / 2.0) + / mode_lib->vba.PixelClock[k]; + else + mode_lib->vba.DisplayPipeLineDeliveryTimeChroma[k] = + mode_lib->vba.SwathWidthY[k] / 2.0 + / mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k] + / mode_lib->vba.DPPCLK[k]; + + DataFabricLineDeliveryTimeChroma = mode_lib->vba.SwathWidthSingleDPPY[k] / 2.0 + * mode_lib->vba.SwathHeightC[k] + * dml_ceil(mode_lib->vba.BytePerPixelDETC[k], 2) + / (mode_lib->vba.ReturnBW + * mode_lib->vba.ReadBandwidthPlaneChroma[k] + / mode_lib->vba.TotalDataReadBandwidth); + mode_lib->vba.LastPixelOfLineExtraWatermark = + dml_max( + mode_lib->vba.LastPixelOfLineExtraWatermark, + DataFabricLineDeliveryTimeChroma + - mode_lib->vba.DisplayPipeLineDeliveryTimeChroma[k]); + } + + mode_lib->vba.UrgentExtraLatency = mode_lib->vba.UrgentRoundTripAndOutOfOrderLatency + + (mode_lib->vba.TotalActiveDPP * mode_lib->vba.PixelChunkSizeInKByte + + mode_lib->vba.TotalDCCActiveDPP + * mode_lib->vba.MetaChunkSize) * 1024.0 + / mode_lib->vba.ReturnBW; + + if (mode_lib->vba.GPUVMEnable) + mode_lib->vba.UrgentExtraLatency += mode_lib->vba.TotalActiveDPP + * mode_lib->vba.PTEGroupSize / mode_lib->vba.ReturnBW; + + mode_lib->vba.UrgentWatermark = mode_lib->vba.UrgentLatencyPixelDataOnly + + mode_lib->vba.LastPixelOfLineExtraWatermark + + mode_lib->vba.UrgentExtraLatency; + + DTRACE(" urgent_extra_latency = %fus", mode_lib->vba.UrgentExtraLatency); + DTRACE(" wm_urgent = %fus", mode_lib->vba.UrgentWatermark); + + mode_lib->vba.UrgentLatency = mode_lib->vba.UrgentLatencyPixelDataOnly; + + mode_lib->vba.TotalActiveWriteback = 0; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.WritebackEnable[k]) + mode_lib->vba.TotalActiveWriteback = mode_lib->vba.TotalActiveWriteback + mode_lib->vba.ActiveWritebacksPerPlane[k]; + } + + if (mode_lib->vba.TotalActiveWriteback <= 1) + mode_lib->vba.WritebackUrgentWatermark = mode_lib->vba.WritebackLatency; + else + mode_lib->vba.WritebackUrgentWatermark = mode_lib->vba.WritebackLatency + + mode_lib->vba.WritebackChunkSize * 1024.0 / 32 + / mode_lib->vba.SOCCLK; + + DTRACE(" wm_wb_urgent = %fus", mode_lib->vba.WritebackUrgentWatermark); + + // NB P-State/DRAM Clock Change Watermark + mode_lib->vba.DRAMClockChangeWatermark = mode_lib->vba.DRAMClockChangeLatency + + mode_lib->vba.UrgentWatermark; + + DTRACE(" wm_pstate_change = %fus", mode_lib->vba.DRAMClockChangeWatermark); + + DTRACE(" calculating wb pstate watermark"); + DTRACE(" total wb outputs %d", mode_lib->vba.TotalActiveWriteback); + DTRACE(" socclk frequency %f Mhz", mode_lib->vba.SOCCLK); + + if (mode_lib->vba.TotalActiveWriteback <= 1) + mode_lib->vba.WritebackDRAMClockChangeWatermark = + mode_lib->vba.DRAMClockChangeLatency + + mode_lib->vba.WritebackLatency; + else + mode_lib->vba.WritebackDRAMClockChangeWatermark = + mode_lib->vba.DRAMClockChangeLatency + + mode_lib->vba.WritebackLatency + + mode_lib->vba.WritebackChunkSize * 1024.0 / 32 + / mode_lib->vba.SOCCLK; + + DTRACE(" wm_wb_pstate %fus", mode_lib->vba.WritebackDRAMClockChangeWatermark); + + // Stutter Efficiency + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + mode_lib->vba.LinesInDETY[k] = mode_lib->vba.DETBufferSizeY[k] + / mode_lib->vba.BytePerPixelDETY[k] / mode_lib->vba.SwathWidthY[k]; + mode_lib->vba.LinesInDETYRoundedDownToSwath[k] = dml_floor( + mode_lib->vba.LinesInDETY[k], + mode_lib->vba.SwathHeightY[k]); + mode_lib->vba.FullDETBufferingTimeY[k] = + mode_lib->vba.LinesInDETYRoundedDownToSwath[k] + * (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]) + / mode_lib->vba.VRatio[k]; + if (mode_lib->vba.BytePerPixelDETC[k] > 0) { + mode_lib->vba.LinesInDETC[k] = mode_lib->vba.DETBufferSizeC[k] + / mode_lib->vba.BytePerPixelDETC[k] + / (mode_lib->vba.SwathWidthY[k] / 2); + mode_lib->vba.LinesInDETCRoundedDownToSwath[k] = dml_floor( + mode_lib->vba.LinesInDETC[k], + mode_lib->vba.SwathHeightC[k]); + mode_lib->vba.FullDETBufferingTimeC[k] = + mode_lib->vba.LinesInDETCRoundedDownToSwath[k] + * (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]) + / (mode_lib->vba.VRatio[k] / 2); + } else { + mode_lib->vba.LinesInDETC[k] = 0; + mode_lib->vba.LinesInDETCRoundedDownToSwath[k] = 0; + mode_lib->vba.FullDETBufferingTimeC[k] = 999999; + } + } + + mode_lib->vba.MinFullDETBufferingTime = 999999.0; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.FullDETBufferingTimeY[k] + < mode_lib->vba.MinFullDETBufferingTime) { + mode_lib->vba.MinFullDETBufferingTime = + mode_lib->vba.FullDETBufferingTimeY[k]; + mode_lib->vba.FrameTimeForMinFullDETBufferingTime = + (double) mode_lib->vba.VTotal[k] * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]; + } + if (mode_lib->vba.FullDETBufferingTimeC[k] + < mode_lib->vba.MinFullDETBufferingTime) { + mode_lib->vba.MinFullDETBufferingTime = + mode_lib->vba.FullDETBufferingTimeC[k]; + mode_lib->vba.FrameTimeForMinFullDETBufferingTime = + (double) mode_lib->vba.VTotal[k] * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]; + } + } + + mode_lib->vba.AverageReadBandwidthGBytePerSecond = 0.0; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.DCCEnable[k]) { + mode_lib->vba.AverageReadBandwidthGBytePerSecond = + mode_lib->vba.AverageReadBandwidthGBytePerSecond + + mode_lib->vba.ReadBandwidthPlaneLuma[k] + / mode_lib->vba.DCCRate[k] + / 1000 + + mode_lib->vba.ReadBandwidthPlaneChroma[k] + / mode_lib->vba.DCCRate[k] + / 1000; + } else { + mode_lib->vba.AverageReadBandwidthGBytePerSecond = + mode_lib->vba.AverageReadBandwidthGBytePerSecond + + mode_lib->vba.ReadBandwidthPlaneLuma[k] + / 1000 + + mode_lib->vba.ReadBandwidthPlaneChroma[k] + / 1000; + } + if (mode_lib->vba.DCCEnable[k]) { + mode_lib->vba.AverageReadBandwidthGBytePerSecond = + mode_lib->vba.AverageReadBandwidthGBytePerSecond + + mode_lib->vba.ReadBandwidthPlaneLuma[k] + / 1000 / 256 + + mode_lib->vba.ReadBandwidthPlaneChroma[k] + / 1000 / 256; + } + if (mode_lib->vba.GPUVMEnable) { + mode_lib->vba.AverageReadBandwidthGBytePerSecond = + mode_lib->vba.AverageReadBandwidthGBytePerSecond + + mode_lib->vba.ReadBandwidthPlaneLuma[k] + / 1000 / 512 + + mode_lib->vba.ReadBandwidthPlaneChroma[k] + / 1000 / 512; + } + } + + mode_lib->vba.PartOfBurstThatFitsInROB = + dml_min( + mode_lib->vba.MinFullDETBufferingTime + * mode_lib->vba.TotalDataReadBandwidth, + mode_lib->vba.ROBBufferSizeInKByte * 1024 + * mode_lib->vba.TotalDataReadBandwidth + / (mode_lib->vba.AverageReadBandwidthGBytePerSecond + * 1000)); + mode_lib->vba.StutterBurstTime = mode_lib->vba.PartOfBurstThatFitsInROB + * (mode_lib->vba.AverageReadBandwidthGBytePerSecond * 1000) + / mode_lib->vba.TotalDataReadBandwidth / mode_lib->vba.ReturnBW + + (mode_lib->vba.MinFullDETBufferingTime + * mode_lib->vba.TotalDataReadBandwidth + - mode_lib->vba.PartOfBurstThatFitsInROB) + / (mode_lib->vba.DCFCLK * 64); + if (mode_lib->vba.TotalActiveWriteback == 0) { + mode_lib->vba.StutterEfficiencyNotIncludingVBlank = (1 + - (mode_lib->vba.SRExitTime + mode_lib->vba.StutterBurstTime) + / mode_lib->vba.MinFullDETBufferingTime) * 100; + } else { + mode_lib->vba.StutterEfficiencyNotIncludingVBlank = 0; + } + + mode_lib->vba.SmallestVBlank = 999999; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.SynchronizedVBlank || mode_lib->vba.NumberOfActivePlanes == 1) { + mode_lib->vba.VBlankTime = (double) (mode_lib->vba.VTotal[k] + - mode_lib->vba.VActive[k]) * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]; + } else { + mode_lib->vba.VBlankTime = 0; + } + mode_lib->vba.SmallestVBlank = dml_min( + mode_lib->vba.SmallestVBlank, + mode_lib->vba.VBlankTime); + } + + mode_lib->vba.StutterEfficiency = (mode_lib->vba.StutterEfficiencyNotIncludingVBlank / 100 + * (mode_lib->vba.FrameTimeForMinFullDETBufferingTime + - mode_lib->vba.SmallestVBlank) + + mode_lib->vba.SmallestVBlank) + / mode_lib->vba.FrameTimeForMinFullDETBufferingTime * 100; + + // dml_ml->vba.DCFCLK Deep Sleep + mode_lib->vba.DCFCLKDeepSleep = 8.0; + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; k++) { + if (mode_lib->vba.BytePerPixelDETC[k] > 0) { + mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = + dml_max( + 1.1 * mode_lib->vba.SwathWidthY[k] + * dml_ceil( + mode_lib->vba.BytePerPixelDETY[k], + 1) / 32 + / mode_lib->vba.DisplayPipeLineDeliveryTimeLuma[k], + 1.1 * mode_lib->vba.SwathWidthY[k] / 2.0 + * dml_ceil( + mode_lib->vba.BytePerPixelDETC[k], + 2) / 32 + / mode_lib->vba.DisplayPipeLineDeliveryTimeChroma[k]); + } else + mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = 1.1 * mode_lib->vba.SwathWidthY[k] + * dml_ceil(mode_lib->vba.BytePerPixelDETY[k], 1) / 64.0 + / mode_lib->vba.DisplayPipeLineDeliveryTimeLuma[k]; + mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = dml_max( + mode_lib->vba.DCFCLKDeepSleepPerPlane[k], + mode_lib->vba.PixelClock[k] / 16.0); + mode_lib->vba.DCFCLKDeepSleep = dml_max( + mode_lib->vba.DCFCLKDeepSleep, + mode_lib->vba.DCFCLKDeepSleepPerPlane[k]); + + DTRACE( + " dcfclk_deepsleep_per_plane[%i] = %fMHz", + k, + mode_lib->vba.DCFCLKDeepSleepPerPlane[k]); + } + + DTRACE(" dcfclk_deepsleep_mhz = %fMHz", mode_lib->vba.DCFCLKDeepSleep); + + // Stutter Watermark + mode_lib->vba.StutterExitWatermark = mode_lib->vba.SRExitTime + + mode_lib->vba.LastPixelOfLineExtraWatermark + + mode_lib->vba.UrgentExtraLatency + 10 / mode_lib->vba.DCFCLKDeepSleep; + mode_lib->vba.StutterEnterPlusExitWatermark = mode_lib->vba.SREnterPlusExitTime + + mode_lib->vba.LastPixelOfLineExtraWatermark + + mode_lib->vba.UrgentExtraLatency; + + DTRACE(" wm_cstate_exit = %fus", mode_lib->vba.StutterExitWatermark); + DTRACE(" wm_cstate_enter_exit = %fus", mode_lib->vba.StutterEnterPlusExitWatermark); + + // Urgent Latency Supported + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + mode_lib->vba.EffectiveDETPlusLBLinesLuma = + dml_floor( + mode_lib->vba.LinesInDETY[k] + + dml_min( + mode_lib->vba.LinesInDETY[k] + * mode_lib->vba.DPPCLK[k] + * mode_lib->vba.BytePerPixelDETY[k] + * mode_lib->vba.PSCL_THROUGHPUT_LUMA[k] + / (mode_lib->vba.ReturnBW + / mode_lib->vba.DPPPerPlane[k]), + (double) mode_lib->vba.EffectiveLBLatencyHidingSourceLinesLuma), + mode_lib->vba.SwathHeightY[k]); + + mode_lib->vba.UrgentLatencySupportUsLuma = mode_lib->vba.EffectiveDETPlusLBLinesLuma + * (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) + / mode_lib->vba.VRatio[k] + - mode_lib->vba.EffectiveDETPlusLBLinesLuma + * mode_lib->vba.SwathWidthY[k] + * mode_lib->vba.BytePerPixelDETY[k] + / (mode_lib->vba.ReturnBW + / mode_lib->vba.DPPPerPlane[k]); + + if (mode_lib->vba.BytePerPixelDETC[k] > 0) { + mode_lib->vba.EffectiveDETPlusLBLinesChroma = + dml_floor( + mode_lib->vba.LinesInDETC[k] + + dml_min( + mode_lib->vba.LinesInDETC[k] + * mode_lib->vba.DPPCLK[k] + * mode_lib->vba.BytePerPixelDETC[k] + * mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k] + / (mode_lib->vba.ReturnBW + / mode_lib->vba.DPPPerPlane[k]), + (double) mode_lib->vba.EffectiveLBLatencyHidingSourceLinesChroma), + mode_lib->vba.SwathHeightC[k]); + mode_lib->vba.UrgentLatencySupportUsChroma = + mode_lib->vba.EffectiveDETPlusLBLinesChroma + * (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]) + / (mode_lib->vba.VRatio[k] / 2) + - mode_lib->vba.EffectiveDETPlusLBLinesChroma + * (mode_lib->vba.SwathWidthY[k] + / 2) + * mode_lib->vba.BytePerPixelDETC[k] + / (mode_lib->vba.ReturnBW + / mode_lib->vba.DPPPerPlane[k]); + mode_lib->vba.UrgentLatencySupportUs[k] = dml_min( + mode_lib->vba.UrgentLatencySupportUsLuma, + mode_lib->vba.UrgentLatencySupportUsChroma); + } else { + mode_lib->vba.UrgentLatencySupportUs[k] = + mode_lib->vba.UrgentLatencySupportUsLuma; + } + } + + mode_lib->vba.MinUrgentLatencySupportUs = 999999; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + mode_lib->vba.MinUrgentLatencySupportUs = dml_min( + mode_lib->vba.MinUrgentLatencySupportUs, + mode_lib->vba.UrgentLatencySupportUs[k]); + } + + // Non-Urgent Latency Tolerance + mode_lib->vba.NonUrgentLatencyTolerance = mode_lib->vba.MinUrgentLatencySupportUs + - mode_lib->vba.UrgentWatermark; + + // DSCCLK + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if ((mode_lib->vba.BlendingAndTiming[k] != k) || !mode_lib->vba.DSCEnabled[k]) { + mode_lib->vba.DSCCLK_calculated[k] = 0.0; + } else { + if (mode_lib->vba.OutputFormat[k] == dm_420 + || mode_lib->vba.OutputFormat[k] == dm_n422) + mode_lib->vba.DSCFormatFactor = 2; + else + mode_lib->vba.DSCFormatFactor = 1; + if (mode_lib->vba.ODMCombineEnabled[k]) + mode_lib->vba.DSCCLK_calculated[k] = + mode_lib->vba.PixelClockBackEnd[k] / 6 + / mode_lib->vba.DSCFormatFactor + / (1 + - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading + / 100); + else + mode_lib->vba.DSCCLK_calculated[k] = + mode_lib->vba.PixelClockBackEnd[k] / 3 + / mode_lib->vba.DSCFormatFactor + / (1 + - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading + / 100); + } + } + + // DSC Delay + // TODO + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + double bpp = mode_lib->vba.OutputBpp[k]; + unsigned int slices = mode_lib->vba.NumberOfDSCSlices[k]; + + if (mode_lib->vba.DSCEnabled[k] && bpp != 0) { + if (!mode_lib->vba.ODMCombineEnabled[k]) { + mode_lib->vba.DSCDelay[k] = + dscceComputeDelay( + mode_lib->vba.DSCInputBitPerComponent[k], + bpp, + dml_ceil( + (double) mode_lib->vba.HActive[k] + / mode_lib->vba.NumberOfDSCSlices[k], + 1), + slices, + mode_lib->vba.OutputFormat[k]) + + dscComputeDelay( + mode_lib->vba.OutputFormat[k]); + } else { + mode_lib->vba.DSCDelay[k] = + 2 + * (dscceComputeDelay( + mode_lib->vba.DSCInputBitPerComponent[k], + bpp, + dml_ceil( + (double) mode_lib->vba.HActive[k] + / mode_lib->vba.NumberOfDSCSlices[k], + 1), + slices / 2.0, + mode_lib->vba.OutputFormat[k]) + + dscComputeDelay( + mode_lib->vba.OutputFormat[k])); + } + mode_lib->vba.DSCDelay[k] = mode_lib->vba.DSCDelay[k] + * mode_lib->vba.PixelClock[k] + / mode_lib->vba.PixelClockBackEnd[k]; + } else { + mode_lib->vba.DSCDelay[k] = 0; + } + } + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) + for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) // NumberOfPlanes + if (j != k && mode_lib->vba.BlendingAndTiming[k] == j + && mode_lib->vba.DSCEnabled[j]) + mode_lib->vba.DSCDelay[k] = mode_lib->vba.DSCDelay[j]; + + // Prefetch + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + unsigned int PDEAndMetaPTEBytesFrameY; + unsigned int PixelPTEBytesPerRowY; + unsigned int MetaRowByteY; + unsigned int MetaRowByteC; + unsigned int PDEAndMetaPTEBytesFrameC; + unsigned int PixelPTEBytesPerRowC; + + Calculate256BBlockSizes( + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.SurfaceTiling[k], + dml_ceil(mode_lib->vba.BytePerPixelDETY[k], 1), + dml_ceil(mode_lib->vba.BytePerPixelDETC[k], 2), + &mode_lib->vba.BlockHeight256BytesY[k], + &mode_lib->vba.BlockHeight256BytesC[k], + &mode_lib->vba.BlockWidth256BytesY[k], + &mode_lib->vba.BlockWidth256BytesC[k]); + PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes( + mode_lib, + mode_lib->vba.DCCEnable[k], + mode_lib->vba.BlockHeight256BytesY[k], + mode_lib->vba.BlockWidth256BytesY[k], + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.SurfaceTiling[k], + dml_ceil(mode_lib->vba.BytePerPixelDETY[k], 1), + mode_lib->vba.SourceScan[k], + mode_lib->vba.ViewportWidth[k], + mode_lib->vba.ViewportHeight[k], + mode_lib->vba.SwathWidthY[k], + mode_lib->vba.GPUVMEnable, + mode_lib->vba.VMMPageSize, + mode_lib->vba.PTEBufferSizeInRequestsLuma, + mode_lib->vba.PDEProcessingBufIn64KBReqs, + mode_lib->vba.PitchY[k], + mode_lib->vba.DCCMetaPitchY[k], + &mode_lib->vba.MacroTileWidthY[k], + &MetaRowByteY, + &PixelPTEBytesPerRowY, + &mode_lib->vba.PTEBufferSizeNotExceeded[mode_lib->vba.VoltageLevel][0], + &mode_lib->vba.dpte_row_height[k], + &mode_lib->vba.meta_row_height[k]); + mode_lib->vba.PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines( + mode_lib, + mode_lib->vba.VRatio[k], + mode_lib->vba.vtaps[k], + mode_lib->vba.Interlace[k], + mode_lib->vba.ProgressiveToInterlaceUnitInOPP, + mode_lib->vba.SwathHeightY[k], + mode_lib->vba.ViewportYStartY[k], + &mode_lib->vba.VInitPreFillY[k], + &mode_lib->vba.MaxNumSwathY[k]); + + if ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_8)) { + PDEAndMetaPTEBytesFrameC = + CalculateVMAndRowBytes( + mode_lib, + mode_lib->vba.DCCEnable[k], + mode_lib->vba.BlockHeight256BytesC[k], + mode_lib->vba.BlockWidth256BytesC[k], + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.SurfaceTiling[k], + dml_ceil( + mode_lib->vba.BytePerPixelDETC[k], + 2), + mode_lib->vba.SourceScan[k], + mode_lib->vba.ViewportWidth[k] / 2, + mode_lib->vba.ViewportHeight[k] / 2, + mode_lib->vba.SwathWidthY[k] / 2, + mode_lib->vba.GPUVMEnable, + mode_lib->vba.VMMPageSize, + mode_lib->vba.PTEBufferSizeInRequestsLuma, + mode_lib->vba.PDEProcessingBufIn64KBReqs, + mode_lib->vba.PitchC[k], + 0, + &mode_lib->vba.MacroTileWidthC[k], + &MetaRowByteC, + &PixelPTEBytesPerRowC, + &mode_lib->vba.PTEBufferSizeNotExceeded[mode_lib->vba.VoltageLevel][0], + &mode_lib->vba.dpte_row_height_chroma[k], + &mode_lib->vba.meta_row_height_chroma[k]); + mode_lib->vba.PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines( + mode_lib, + mode_lib->vba.VRatio[k] / 2, + mode_lib->vba.VTAPsChroma[k], + mode_lib->vba.Interlace[k], + mode_lib->vba.ProgressiveToInterlaceUnitInOPP, + mode_lib->vba.SwathHeightC[k], + mode_lib->vba.ViewportYStartC[k], + &mode_lib->vba.VInitPreFillC[k], + &mode_lib->vba.MaxNumSwathC[k]); + } else { + PixelPTEBytesPerRowC = 0; + PDEAndMetaPTEBytesFrameC = 0; + MetaRowByteC = 0; + mode_lib->vba.MaxNumSwathC[k] = 0; + mode_lib->vba.PrefetchSourceLinesC[k] = 0; + } + + mode_lib->vba.PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC; + mode_lib->vba.PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + + PDEAndMetaPTEBytesFrameC; + mode_lib->vba.MetaRowByte[k] = MetaRowByteY + MetaRowByteC; + + CalculateActiveRowBandwidth( + mode_lib->vba.GPUVMEnable, + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.VRatio[k], + mode_lib->vba.DCCEnable[k], + mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k], + MetaRowByteY, + MetaRowByteC, + mode_lib->vba.meta_row_height[k], + mode_lib->vba.meta_row_height_chroma[k], + PixelPTEBytesPerRowY, + PixelPTEBytesPerRowC, + mode_lib->vba.dpte_row_height[k], + mode_lib->vba.dpte_row_height_chroma[k], + &mode_lib->vba.meta_row_bw[k], + &mode_lib->vba.dpte_row_bw[k], + &mode_lib->vba.qual_row_bw[k]); + } + + mode_lib->vba.TCalc = 24.0 / mode_lib->vba.DCFCLKDeepSleep; + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.BlendingAndTiming[k] == k) { + if (mode_lib->vba.WritebackEnable[k] == true) { + mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k] = + mode_lib->vba.WritebackLatency + + CalculateWriteBackDelay( + mode_lib->vba.WritebackPixelFormat[k], + mode_lib->vba.WritebackHRatio[k], + mode_lib->vba.WritebackVRatio[k], + mode_lib->vba.WritebackLumaHTaps[k], + mode_lib->vba.WritebackLumaVTaps[k], + mode_lib->vba.WritebackChromaHTaps[k], + mode_lib->vba.WritebackChromaVTaps[k], + mode_lib->vba.WritebackDestinationWidth[k]) + / mode_lib->vba.DISPCLK; + } else + mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k] = 0; + for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) { + if (mode_lib->vba.BlendingAndTiming[j] == k + && mode_lib->vba.WritebackEnable[j] == true) { + mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k] = + dml_max( + mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k], + mode_lib->vba.WritebackLatency + + CalculateWriteBackDelay( + mode_lib->vba.WritebackPixelFormat[j], + mode_lib->vba.WritebackHRatio[j], + mode_lib->vba.WritebackVRatio[j], + mode_lib->vba.WritebackLumaHTaps[j], + mode_lib->vba.WritebackLumaVTaps[j], + mode_lib->vba.WritebackChromaHTaps[j], + mode_lib->vba.WritebackChromaVTaps[j], + mode_lib->vba.WritebackDestinationWidth[j]) + / mode_lib->vba.DISPCLK); + } + } + } + } + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) + for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) + if (mode_lib->vba.BlendingAndTiming[k] == j) + mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k] = + mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][j]; + + mode_lib->vba.VStartupLines = 13; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + mode_lib->vba.MaxVStartupLines[k] = + mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k] + - dml_max( + 1.0, + dml_ceil( + mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k] + / (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]), + 1)); + } + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) + mode_lib->vba.MaximumMaxVStartupLines = dml_max( + mode_lib->vba.MaximumMaxVStartupLines, + mode_lib->vba.MaxVStartupLines[k]); + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + mode_lib->vba.cursor_bw[k] = 0.0; + for (j = 0; j < mode_lib->vba.NumberOfCursors[k]; ++j) + mode_lib->vba.cursor_bw[k] += mode_lib->vba.CursorWidth[k][j] + * mode_lib->vba.CursorBPP[k][j] / 8.0 + / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) + * mode_lib->vba.VRatio[k]; + } + + do { + double MaxTotalRDBandwidth = 0; + bool DestinationLineTimesForPrefetchLessThan2 = false; + bool VRatioPrefetchMoreThan4 = false; + bool prefetch_vm_bw_valid = true; + bool prefetch_row_bw_valid = true; + double TWait = CalculateTWait( + mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb], + mode_lib->vba.DRAMClockChangeLatency, + mode_lib->vba.UrgentLatencyPixelDataOnly, + mode_lib->vba.SREnterPlusExitTime); + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.XFCEnabled[k] == true) { + mode_lib->vba.XFCRemoteSurfaceFlipDelay = + CalculateRemoteSurfaceFlipDelay( + mode_lib, + mode_lib->vba.VRatio[k], + mode_lib->vba.SwathWidthY[k], + dml_ceil( + mode_lib->vba.BytePerPixelDETY[k], + 1), + mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k], + mode_lib->vba.XFCTSlvVupdateOffset, + mode_lib->vba.XFCTSlvVupdateWidth, + mode_lib->vba.XFCTSlvVreadyOffset, + mode_lib->vba.XFCXBUFLatencyTolerance, + mode_lib->vba.XFCFillBWOverhead, + mode_lib->vba.XFCSlvChunkSize, + mode_lib->vba.XFCBusTransportTime, + mode_lib->vba.TCalc, + TWait, + &mode_lib->vba.SrcActiveDrainRate, + &mode_lib->vba.TInitXFill, + &mode_lib->vba.TslvChk); + } else { + mode_lib->vba.XFCRemoteSurfaceFlipDelay = 0; + } + mode_lib->vba.ErrorResult[k] = + CalculatePrefetchSchedule( + mode_lib, + mode_lib->vba.DPPCLK[k], + mode_lib->vba.DISPCLK, + mode_lib->vba.PixelClock[k], + mode_lib->vba.DCFCLKDeepSleep, + mode_lib->vba.DSCDelay[k], + mode_lib->vba.DPPPerPlane[k], + mode_lib->vba.ScalerEnabled[k], + mode_lib->vba.NumberOfCursors[k], + mode_lib->vba.DPPCLKDelaySubtotal, + mode_lib->vba.DPPCLKDelaySCL, + mode_lib->vba.DPPCLKDelaySCLLBOnly, + mode_lib->vba.DPPCLKDelayCNVCFormater, + mode_lib->vba.DPPCLKDelayCNVCCursor, + mode_lib->vba.DISPCLKDelaySubtotal, + (unsigned int) (mode_lib->vba.SwathWidthY[k] + / mode_lib->vba.HRatio[k]), + mode_lib->vba.OutputFormat[k], + mode_lib->vba.VTotal[k] + - mode_lib->vba.VActive[k], + mode_lib->vba.HTotal[k], + mode_lib->vba.MaxInterDCNTileRepeaters, + dml_min( + mode_lib->vba.VStartupLines, + mode_lib->vba.MaxVStartupLines[k]), + mode_lib->vba.GPUVMMaxPageTableLevels, + mode_lib->vba.GPUVMEnable, + mode_lib->vba.DynamicMetadataEnable[k], + mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired[k], + mode_lib->vba.DynamicMetadataTransmittedBytes[k], + mode_lib->vba.DCCEnable[k], + mode_lib->vba.UrgentLatencyPixelDataOnly, + mode_lib->vba.UrgentExtraLatency, + mode_lib->vba.TCalc, + mode_lib->vba.PDEAndMetaPTEBytesFrame[k], + mode_lib->vba.MetaRowByte[k], + mode_lib->vba.PixelPTEBytesPerRow[k], + mode_lib->vba.PrefetchSourceLinesY[k], + mode_lib->vba.SwathWidthY[k], + mode_lib->vba.BytePerPixelDETY[k], + mode_lib->vba.VInitPreFillY[k], + mode_lib->vba.MaxNumSwathY[k], + mode_lib->vba.PrefetchSourceLinesC[k], + mode_lib->vba.BytePerPixelDETC[k], + mode_lib->vba.VInitPreFillC[k], + mode_lib->vba.MaxNumSwathC[k], + mode_lib->vba.SwathHeightY[k], + mode_lib->vba.SwathHeightC[k], + TWait, + mode_lib->vba.XFCEnabled[k], + mode_lib->vba.XFCRemoteSurfaceFlipDelay, + mode_lib->vba.Interlace[k], + mode_lib->vba.ProgressiveToInterlaceUnitInOPP, + &mode_lib->vba.DSTXAfterScaler[k], + &mode_lib->vba.DSTYAfterScaler[k], + &mode_lib->vba.DestinationLinesForPrefetch[k], + &mode_lib->vba.PrefetchBandwidth[k], + &mode_lib->vba.DestinationLinesToRequestVMInVBlank[k], + &mode_lib->vba.DestinationLinesToRequestRowInVBlank[k], + &mode_lib->vba.VRatioPrefetchY[k], + &mode_lib->vba.VRatioPrefetchC[k], + &mode_lib->vba.RequiredPrefetchPixDataBWLuma[k], + &mode_lib->vba.VStartupRequiredWhenNotEnoughTimeForDynamicMetadata, + &mode_lib->vba.Tno_bw[k], + &mode_lib->vba.VUpdateOffsetPix[k], + &mode_lib->vba.VUpdateWidthPix[k], + &mode_lib->vba.VReadyOffsetPix[k]); + if (mode_lib->vba.BlendingAndTiming[k] == k) { + mode_lib->vba.VStartup[k] = dml_min( + mode_lib->vba.VStartupLines, + mode_lib->vba.MaxVStartupLines[k]); + if (mode_lib->vba.VStartupRequiredWhenNotEnoughTimeForDynamicMetadata + != 0) { + mode_lib->vba.VStartup[k] = + mode_lib->vba.VStartupRequiredWhenNotEnoughTimeForDynamicMetadata; + } + } else { + mode_lib->vba.VStartup[k] = + dml_min( + mode_lib->vba.VStartupLines, + mode_lib->vba.MaxVStartupLines[mode_lib->vba.BlendingAndTiming[k]]); + } + } + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + + if (mode_lib->vba.PDEAndMetaPTEBytesFrame[k] == 0) + mode_lib->vba.prefetch_vm_bw[k] = 0; + else if (mode_lib->vba.DestinationLinesToRequestVMInVBlank[k] > 0) { + mode_lib->vba.prefetch_vm_bw[k] = + (double) mode_lib->vba.PDEAndMetaPTEBytesFrame[k] + / (mode_lib->vba.DestinationLinesToRequestVMInVBlank[k] + * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]); + } else { + mode_lib->vba.prefetch_vm_bw[k] = 0; + prefetch_vm_bw_valid = false; + } + if (mode_lib->vba.MetaRowByte[k] + mode_lib->vba.PixelPTEBytesPerRow[k] + == 0) + mode_lib->vba.prefetch_row_bw[k] = 0; + else if (mode_lib->vba.DestinationLinesToRequestRowInVBlank[k] > 0) { + mode_lib->vba.prefetch_row_bw[k] = + (double) (mode_lib->vba.MetaRowByte[k] + + mode_lib->vba.PixelPTEBytesPerRow[k]) + / (mode_lib->vba.DestinationLinesToRequestRowInVBlank[k] + * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]); + } else { + mode_lib->vba.prefetch_row_bw[k] = 0; + prefetch_row_bw_valid = false; + } + + MaxTotalRDBandwidth = + MaxTotalRDBandwidth + mode_lib->vba.cursor_bw[k] + + dml_max( + mode_lib->vba.prefetch_vm_bw[k], + dml_max( + mode_lib->vba.prefetch_row_bw[k], + dml_max( + mode_lib->vba.ReadBandwidthPlaneLuma[k] + + mode_lib->vba.ReadBandwidthPlaneChroma[k], + mode_lib->vba.RequiredPrefetchPixDataBWLuma[k]) + + mode_lib->vba.meta_row_bw[k] + + mode_lib->vba.dpte_row_bw[k])); + + if (mode_lib->vba.DestinationLinesForPrefetch[k] < 2) + DestinationLineTimesForPrefetchLessThan2 = true; + if (mode_lib->vba.VRatioPrefetchY[k] > 4 + || mode_lib->vba.VRatioPrefetchC[k] > 4) + VRatioPrefetchMoreThan4 = true; + } + + if (MaxTotalRDBandwidth <= mode_lib->vba.ReturnBW && prefetch_vm_bw_valid + && prefetch_row_bw_valid && !VRatioPrefetchMoreThan4 + && !DestinationLineTimesForPrefetchLessThan2) + mode_lib->vba.PrefetchModeSupported = true; + else { + mode_lib->vba.PrefetchModeSupported = false; + dml_print( + "DML: CalculatePrefetchSchedule ***failed***. Bandwidth violation. Results are NOT valid\n"); + } + + if (mode_lib->vba.PrefetchModeSupported == true) { + double final_flip_bw[DC__NUM_DPP__MAX]; + unsigned int ImmediateFlipBytes[DC__NUM_DPP__MAX]; + double total_dcn_read_bw_with_flip = 0; + + mode_lib->vba.BandwidthAvailableForImmediateFlip = mode_lib->vba.ReturnBW; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + mode_lib->vba.BandwidthAvailableForImmediateFlip = + mode_lib->vba.BandwidthAvailableForImmediateFlip + - mode_lib->vba.cursor_bw[k] + - dml_max( + mode_lib->vba.ReadBandwidthPlaneLuma[k] + + mode_lib->vba.ReadBandwidthPlaneChroma[k] + + mode_lib->vba.qual_row_bw[k], + mode_lib->vba.PrefetchBandwidth[k]); + } + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + ImmediateFlipBytes[k] = 0; + if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8 + && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) { + ImmediateFlipBytes[k] = + mode_lib->vba.PDEAndMetaPTEBytesFrame[k] + + mode_lib->vba.MetaRowByte[k] + + mode_lib->vba.PixelPTEBytesPerRow[k]; + } + } + mode_lib->vba.TotImmediateFlipBytes = 0; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8 + && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) { + mode_lib->vba.TotImmediateFlipBytes = + mode_lib->vba.TotImmediateFlipBytes + + ImmediateFlipBytes[k]; + } + } + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + CalculateFlipSchedule( + mode_lib, + mode_lib->vba.UrgentExtraLatency, + mode_lib->vba.UrgentLatencyPixelDataOnly, + mode_lib->vba.GPUVMMaxPageTableLevels, + mode_lib->vba.GPUVMEnable, + mode_lib->vba.BandwidthAvailableForImmediateFlip, + mode_lib->vba.TotImmediateFlipBytes, + mode_lib->vba.SourcePixelFormat[k], + ImmediateFlipBytes[k], + mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k], + mode_lib->vba.VRatio[k], + mode_lib->vba.Tno_bw[k], + mode_lib->vba.PDEAndMetaPTEBytesFrame[k], + mode_lib->vba.MetaRowByte[k], + mode_lib->vba.PixelPTEBytesPerRow[k], + mode_lib->vba.DCCEnable[k], + mode_lib->vba.dpte_row_height[k], + mode_lib->vba.meta_row_height[k], + mode_lib->vba.qual_row_bw[k], + &mode_lib->vba.DestinationLinesToRequestVMInImmediateFlip[k], + &mode_lib->vba.DestinationLinesToRequestRowInImmediateFlip[k], + &final_flip_bw[k], + &mode_lib->vba.ImmediateFlipSupportedForPipe[k]); + } + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + total_dcn_read_bw_with_flip = + total_dcn_read_bw_with_flip + + mode_lib->vba.cursor_bw[k] + + dml_max( + mode_lib->vba.prefetch_vm_bw[k], + dml_max( + mode_lib->vba.prefetch_row_bw[k], + final_flip_bw[k] + + dml_max( + mode_lib->vba.ReadBandwidthPlaneLuma[k] + + mode_lib->vba.ReadBandwidthPlaneChroma[k], + mode_lib->vba.RequiredPrefetchPixDataBWLuma[k]))); + } + mode_lib->vba.ImmediateFlipSupported = true; + if (total_dcn_read_bw_with_flip > mode_lib->vba.ReturnBW) { + mode_lib->vba.ImmediateFlipSupported = false; + } + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.ImmediateFlipSupportedForPipe[k] == false) { + mode_lib->vba.ImmediateFlipSupported = false; + } + } + } else { + mode_lib->vba.ImmediateFlipSupported = false; + } + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.ErrorResult[k]) { + mode_lib->vba.PrefetchModeSupported = false; + dml_print( + "DML: CalculatePrefetchSchedule ***failed***. Prefetch schedule violation. Results are NOT valid\n"); + } + } + + mode_lib->vba.VStartupLines = mode_lib->vba.VStartupLines + 1; + } while (!((mode_lib->vba.PrefetchModeSupported + && (!mode_lib->vba.ImmediateFlipSupport + || mode_lib->vba.ImmediateFlipSupported)) + || mode_lib->vba.MaximumMaxVStartupLines < mode_lib->vba.VStartupLines)); + + //Display Pipeline Delivery Time in Prefetch + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.VRatioPrefetchY[k] <= 1) { + mode_lib->vba.DisplayPipeLineDeliveryTimeLumaPrefetch[k] = + mode_lib->vba.SwathWidthY[k] * mode_lib->vba.DPPPerPlane[k] + / mode_lib->vba.HRatio[k] + / mode_lib->vba.PixelClock[k]; + } else { + mode_lib->vba.DisplayPipeLineDeliveryTimeLumaPrefetch[k] = + mode_lib->vba.SwathWidthY[k] + / mode_lib->vba.PSCL_THROUGHPUT_LUMA[k] + / mode_lib->vba.DPPCLK[k]; + } + if (mode_lib->vba.BytePerPixelDETC[k] == 0) { + mode_lib->vba.DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0; + } else { + if (mode_lib->vba.VRatioPrefetchC[k] <= 1) { + mode_lib->vba.DisplayPipeLineDeliveryTimeChromaPrefetch[k] = + mode_lib->vba.SwathWidthY[k] + * mode_lib->vba.DPPPerPlane[k] + / mode_lib->vba.HRatio[k] + / mode_lib->vba.PixelClock[k]; + } else { + mode_lib->vba.DisplayPipeLineDeliveryTimeChromaPrefetch[k] = + mode_lib->vba.SwathWidthY[k] + / mode_lib->vba.PSCL_THROUGHPUT_LUMA[k] + / mode_lib->vba.DPPCLK[k]; + } + } + } + + // Min TTUVBlank + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 0) { + mode_lib->vba.AllowDRAMClockChangeDuringVBlank[k] = true; + mode_lib->vba.AllowDRAMSelfRefreshDuringVBlank[k] = true; + mode_lib->vba.MinTTUVBlank[k] = dml_max( + mode_lib->vba.DRAMClockChangeWatermark, + dml_max( + mode_lib->vba.StutterEnterPlusExitWatermark, + mode_lib->vba.UrgentWatermark)); + } else if (mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 1) { + mode_lib->vba.AllowDRAMClockChangeDuringVBlank[k] = false; + mode_lib->vba.AllowDRAMSelfRefreshDuringVBlank[k] = true; + mode_lib->vba.MinTTUVBlank[k] = dml_max( + mode_lib->vba.StutterEnterPlusExitWatermark, + mode_lib->vba.UrgentWatermark); + } else { + mode_lib->vba.AllowDRAMClockChangeDuringVBlank[k] = false; + mode_lib->vba.AllowDRAMSelfRefreshDuringVBlank[k] = false; + mode_lib->vba.MinTTUVBlank[k] = mode_lib->vba.UrgentWatermark; + } + if (!mode_lib->vba.DynamicMetadataEnable[k]) + mode_lib->vba.MinTTUVBlank[k] = mode_lib->vba.TCalc + + mode_lib->vba.MinTTUVBlank[k]; + } + + // DCC Configuration + mode_lib->vba.ActiveDPPs = 0; + // NB P-State/DRAM Clock Change Support + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + mode_lib->vba.ActiveDPPs = mode_lib->vba.ActiveDPPs + mode_lib->vba.DPPPerPlane[k]; + } + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + double EffectiveLBLatencyHidingY; + double EffectiveLBLatencyHidingC; + double DPPOutputBufferLinesY; + double DPPOutputBufferLinesC; + double DPPOPPBufferingY; + double MaxDETBufferingTimeY; + double ActiveDRAMClockChangeLatencyMarginY; + + mode_lib->vba.LBLatencyHidingSourceLinesY = + dml_min( + mode_lib->vba.MaxLineBufferLines, + (unsigned int) dml_floor( + (double) mode_lib->vba.LineBufferSize + / mode_lib->vba.LBBitPerPixel[k] + / (mode_lib->vba.SwathWidthY[k] + / dml_max( + mode_lib->vba.HRatio[k], + 1.0)), + 1)) - (mode_lib->vba.vtaps[k] - 1); + + mode_lib->vba.LBLatencyHidingSourceLinesC = + dml_min( + mode_lib->vba.MaxLineBufferLines, + (unsigned int) dml_floor( + (double) mode_lib->vba.LineBufferSize + / mode_lib->vba.LBBitPerPixel[k] + / (mode_lib->vba.SwathWidthY[k] + / 2.0 + / dml_max( + mode_lib->vba.HRatio[k] + / 2, + 1.0)), + 1)) + - (mode_lib->vba.VTAPsChroma[k] - 1); + + EffectiveLBLatencyHidingY = mode_lib->vba.LBLatencyHidingSourceLinesY + / mode_lib->vba.VRatio[k] + * (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]); + + EffectiveLBLatencyHidingC = mode_lib->vba.LBLatencyHidingSourceLinesC + / (mode_lib->vba.VRatio[k] / 2) + * (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]); + + if (mode_lib->vba.SwathWidthY[k] > 2 * mode_lib->vba.DPPOutputBufferPixels) { + DPPOutputBufferLinesY = mode_lib->vba.DPPOutputBufferPixels + / mode_lib->vba.SwathWidthY[k]; + } else if (mode_lib->vba.SwathWidthY[k] > mode_lib->vba.DPPOutputBufferPixels) { + DPPOutputBufferLinesY = 0.5; + } else { + DPPOutputBufferLinesY = 1; + } + + if (mode_lib->vba.SwathWidthY[k] / 2 > 2 * mode_lib->vba.DPPOutputBufferPixels) { + DPPOutputBufferLinesC = mode_lib->vba.DPPOutputBufferPixels + / (mode_lib->vba.SwathWidthY[k] / 2); + } else if (mode_lib->vba.SwathWidthY[k] / 2 > mode_lib->vba.DPPOutputBufferPixels) { + DPPOutputBufferLinesC = 0.5; + } else { + DPPOutputBufferLinesC = 1; + } + + DPPOPPBufferingY = (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) + * (DPPOutputBufferLinesY + mode_lib->vba.OPPOutputBufferLines); + MaxDETBufferingTimeY = mode_lib->vba.FullDETBufferingTimeY[k] + + (mode_lib->vba.LinesInDETY[k] + - mode_lib->vba.LinesInDETYRoundedDownToSwath[k]) + / mode_lib->vba.SwathHeightY[k] + * (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]); + + ActiveDRAMClockChangeLatencyMarginY = DPPOPPBufferingY + EffectiveLBLatencyHidingY + + MaxDETBufferingTimeY - mode_lib->vba.DRAMClockChangeWatermark; + + if (mode_lib->vba.ActiveDPPs > 1) { + ActiveDRAMClockChangeLatencyMarginY = + ActiveDRAMClockChangeLatencyMarginY + - (1 - 1 / (mode_lib->vba.ActiveDPPs - 1)) + * mode_lib->vba.SwathHeightY[k] + * (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]); + } + + if (mode_lib->vba.BytePerPixelDETC[k] > 0) { + double DPPOPPBufferingC = (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]) + * (DPPOutputBufferLinesC + + mode_lib->vba.OPPOutputBufferLines); + double MaxDETBufferingTimeC = + mode_lib->vba.FullDETBufferingTimeC[k] + + (mode_lib->vba.LinesInDETC[k] + - mode_lib->vba.LinesInDETCRoundedDownToSwath[k]) + / mode_lib->vba.SwathHeightC[k] + * (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]); + double ActiveDRAMClockChangeLatencyMarginC = DPPOPPBufferingC + + EffectiveLBLatencyHidingC + MaxDETBufferingTimeC + - mode_lib->vba.DRAMClockChangeWatermark; + + if (mode_lib->vba.ActiveDPPs > 1) { + ActiveDRAMClockChangeLatencyMarginC = + ActiveDRAMClockChangeLatencyMarginC + - (1 + - 1 + / (mode_lib->vba.ActiveDPPs + - 1)) + * mode_lib->vba.SwathHeightC[k] + * (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]); + } + mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min( + ActiveDRAMClockChangeLatencyMarginY, + ActiveDRAMClockChangeLatencyMarginC); + } else { + mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = + ActiveDRAMClockChangeLatencyMarginY; + } + + if (mode_lib->vba.WritebackEnable[k]) { + double WritebackDRAMClockChangeLatencyMargin; + + if (mode_lib->vba.WritebackPixelFormat[k] == dm_444_32) { + WritebackDRAMClockChangeLatencyMargin = + (double) (mode_lib->vba.WritebackInterfaceLumaBufferSize + + mode_lib->vba.WritebackInterfaceChromaBufferSize) + / (mode_lib->vba.WritebackDestinationWidth[k] + * mode_lib->vba.WritebackDestinationHeight[k] + / (mode_lib->vba.WritebackSourceHeight[k] + * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]) + * 4) + - mode_lib->vba.WritebackDRAMClockChangeWatermark; + } else if (mode_lib->vba.WritebackPixelFormat[k] == dm_420_10) { + WritebackDRAMClockChangeLatencyMargin = + dml_min( + (double) mode_lib->vba.WritebackInterfaceLumaBufferSize + * 8.0 / 10, + 2.0 + * mode_lib->vba.WritebackInterfaceChromaBufferSize + * 8 / 10) + / (mode_lib->vba.WritebackDestinationWidth[k] + * mode_lib->vba.WritebackDestinationHeight[k] + / (mode_lib->vba.WritebackSourceHeight[k] + * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k])) + - mode_lib->vba.WritebackDRAMClockChangeWatermark; + } else { + WritebackDRAMClockChangeLatencyMargin = + dml_min( + (double) mode_lib->vba.WritebackInterfaceLumaBufferSize, + 2.0 + * mode_lib->vba.WritebackInterfaceChromaBufferSize) + / (mode_lib->vba.WritebackDestinationWidth[k] + * mode_lib->vba.WritebackDestinationHeight[k] + / (mode_lib->vba.WritebackSourceHeight[k] + * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k])) + - mode_lib->vba.WritebackDRAMClockChangeWatermark; + } + mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min( + mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k], + WritebackDRAMClockChangeLatencyMargin); + } + } + + mode_lib->vba.MinActiveDRAMClockChangeMargin = 999999; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] + < mode_lib->vba.MinActiveDRAMClockChangeMargin) { + mode_lib->vba.MinActiveDRAMClockChangeMargin = + mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k]; + } + } + + mode_lib->vba.MinActiveDRAMClockChangeLatencySupported = + mode_lib->vba.MinActiveDRAMClockChangeMargin + + mode_lib->vba.DRAMClockChangeLatency; + + if (mode_lib->vba.DRAMClockChangeSupportsVActive && + mode_lib->vba.MinActiveDRAMClockChangeMargin > 60) { + mode_lib->vba.DRAMClockChangeWatermark += 25; + mode_lib->vba.DRAMClockChangeSupport[0][0] = dm_dram_clock_change_vactive; + } else { + if (mode_lib->vba.SynchronizedVBlank || mode_lib->vba.NumberOfActivePlanes == 1) { + mode_lib->vba.DRAMClockChangeSupport[0][0] = dm_dram_clock_change_vblank; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (!mode_lib->vba.AllowDRAMClockChangeDuringVBlank[k]) { + mode_lib->vba.DRAMClockChangeSupport[0][0] = + dm_dram_clock_change_unsupported; + } + } + } else { + mode_lib->vba.DRAMClockChangeSupport[0][0] = dm_dram_clock_change_unsupported; + } + } + for (k = 0; k <= mode_lib->vba.soc.num_states; k++) + for (j = 0; j < 2; j++) + mode_lib->vba.DRAMClockChangeSupport[k][j] = mode_lib->vba.DRAMClockChangeSupport[0][0]; + + //XFC Parameters: + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.XFCEnabled[k] == true) { + double TWait; + + mode_lib->vba.XFCSlaveVUpdateOffset[k] = mode_lib->vba.XFCTSlvVupdateOffset; + mode_lib->vba.XFCSlaveVupdateWidth[k] = mode_lib->vba.XFCTSlvVupdateWidth; + mode_lib->vba.XFCSlaveVReadyOffset[k] = mode_lib->vba.XFCTSlvVreadyOffset; + TWait = CalculateTWait( + mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb], + mode_lib->vba.DRAMClockChangeLatency, + mode_lib->vba.UrgentLatencyPixelDataOnly, + mode_lib->vba.SREnterPlusExitTime); + mode_lib->vba.XFCRemoteSurfaceFlipDelay = CalculateRemoteSurfaceFlipDelay( + mode_lib, + mode_lib->vba.VRatio[k], + mode_lib->vba.SwathWidthY[k], + dml_ceil(mode_lib->vba.BytePerPixelDETY[k], 1), + mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k], + mode_lib->vba.XFCTSlvVupdateOffset, + mode_lib->vba.XFCTSlvVupdateWidth, + mode_lib->vba.XFCTSlvVreadyOffset, + mode_lib->vba.XFCXBUFLatencyTolerance, + mode_lib->vba.XFCFillBWOverhead, + mode_lib->vba.XFCSlvChunkSize, + mode_lib->vba.XFCBusTransportTime, + mode_lib->vba.TCalc, + TWait, + &mode_lib->vba.SrcActiveDrainRate, + &mode_lib->vba.TInitXFill, + &mode_lib->vba.TslvChk); + mode_lib->vba.XFCRemoteSurfaceFlipLatency[k] = + dml_floor( + mode_lib->vba.XFCRemoteSurfaceFlipDelay + / (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]), + 1); + mode_lib->vba.XFCTransferDelay[k] = + dml_ceil( + mode_lib->vba.XFCBusTransportTime + / (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]), + 1); + mode_lib->vba.XFCPrechargeDelay[k] = + dml_ceil( + (mode_lib->vba.XFCBusTransportTime + + mode_lib->vba.TInitXFill + + mode_lib->vba.TslvChk) + / (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]), + 1); + mode_lib->vba.InitFillLevel = mode_lib->vba.XFCXBUFLatencyTolerance + * mode_lib->vba.SrcActiveDrainRate; + mode_lib->vba.FinalFillMargin = + (mode_lib->vba.DestinationLinesToRequestVMInVBlank[k] + + mode_lib->vba.DestinationLinesToRequestRowInVBlank[k]) + * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k] + * mode_lib->vba.SrcActiveDrainRate + + mode_lib->vba.XFCFillConstant; + mode_lib->vba.FinalFillLevel = mode_lib->vba.XFCRemoteSurfaceFlipDelay + * mode_lib->vba.SrcActiveDrainRate + + mode_lib->vba.FinalFillMargin; + mode_lib->vba.RemainingFillLevel = dml_max( + 0.0, + mode_lib->vba.FinalFillLevel - mode_lib->vba.InitFillLevel); + mode_lib->vba.TFinalxFill = mode_lib->vba.RemainingFillLevel + / (mode_lib->vba.SrcActiveDrainRate + * mode_lib->vba.XFCFillBWOverhead / 100); + mode_lib->vba.XFCPrefetchMargin[k] = + mode_lib->vba.XFCRemoteSurfaceFlipDelay + + mode_lib->vba.TFinalxFill + + (mode_lib->vba.DestinationLinesToRequestVMInVBlank[k] + + mode_lib->vba.DestinationLinesToRequestRowInVBlank[k]) + * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]; + } else { + mode_lib->vba.XFCSlaveVUpdateOffset[k] = 0; + mode_lib->vba.XFCSlaveVupdateWidth[k] = 0; + mode_lib->vba.XFCSlaveVReadyOffset[k] = 0; + mode_lib->vba.XFCRemoteSurfaceFlipLatency[k] = 0; + mode_lib->vba.XFCPrechargeDelay[k] = 0; + mode_lib->vba.XFCTransferDelay[k] = 0; + mode_lib->vba.XFCPrefetchMargin[k] = 0; + } + } + { + unsigned int VStartupMargin = 0; + bool FirstMainPlane = true; + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.BlendingAndTiming[k] == k) { + unsigned int Margin = (mode_lib->vba.MaxVStartupLines[k] - mode_lib->vba.VStartup[k]) + * mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]; + + if (FirstMainPlane) { + VStartupMargin = Margin; + FirstMainPlane = false; + } else + VStartupMargin = dml_min(VStartupMargin, Margin); + } + + if (mode_lib->vba.UseMaximumVStartup) { + if (mode_lib->vba.VTotal_Max[k] == mode_lib->vba.VTotal[k]) { + //only use max vstart if it is not drr or lateflip. + mode_lib->vba.VStartup[k] = mode_lib->vba.MaxVStartupLines[mode_lib->vba.BlendingAndTiming[k]]; + } + } + } +} +} + +static void dml20_DisplayPipeConfiguration(struct display_mode_lib *mode_lib) +{ + double BytePerPixDETY; + double BytePerPixDETC; + double Read256BytesBlockHeightY; + double Read256BytesBlockHeightC; + double Read256BytesBlockWidthY; + double Read256BytesBlockWidthC; + double MaximumSwathHeightY; + double MaximumSwathHeightC; + double MinimumSwathHeightY; + double MinimumSwathHeightC; + double SwathWidth; + double SwathWidthGranularityY; + double SwathWidthGranularityC; + double RoundedUpMaxSwathSizeBytesY; + double RoundedUpMaxSwathSizeBytesC; + unsigned int j, k; + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + bool MainPlaneDoesODMCombine = false; + + if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) { + BytePerPixDETY = 8; + BytePerPixDETC = 0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32) { + BytePerPixDETY = 4; + BytePerPixDETC = 0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_16) { + BytePerPixDETY = 2; + BytePerPixDETC = 0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_8) { + BytePerPixDETY = 1; + BytePerPixDETC = 0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) { + BytePerPixDETY = 1; + BytePerPixDETC = 2; + } else { + BytePerPixDETY = 4.0 / 3.0; + BytePerPixDETC = 8.0 / 3.0; + } + + if ((mode_lib->vba.SourcePixelFormat[k] == dm_444_64 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_32 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_16 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_8)) { + if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) { + Read256BytesBlockHeightY = 1; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) { + Read256BytesBlockHeightY = 4; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_16) { + Read256BytesBlockHeightY = 8; + } else { + Read256BytesBlockHeightY = 16; + } + Read256BytesBlockWidthY = 256 / dml_ceil(BytePerPixDETY, 1) + / Read256BytesBlockHeightY; + Read256BytesBlockHeightC = 0; + Read256BytesBlockWidthC = 0; + } else { + if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) { + Read256BytesBlockHeightY = 1; + Read256BytesBlockHeightC = 1; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) { + Read256BytesBlockHeightY = 16; + Read256BytesBlockHeightC = 8; + } else { + Read256BytesBlockHeightY = 8; + Read256BytesBlockHeightC = 8; + } + Read256BytesBlockWidthY = 256 / dml_ceil(BytePerPixDETY, 1) + / Read256BytesBlockHeightY; + Read256BytesBlockWidthC = 256 / dml_ceil(BytePerPixDETC, 2) + / Read256BytesBlockHeightC; + } + + if (mode_lib->vba.SourceScan[k] == dm_horz) { + MaximumSwathHeightY = Read256BytesBlockHeightY; + MaximumSwathHeightC = Read256BytesBlockHeightC; + } else { + MaximumSwathHeightY = Read256BytesBlockWidthY; + MaximumSwathHeightC = Read256BytesBlockWidthC; + } + + if ((mode_lib->vba.SourcePixelFormat[k] == dm_444_64 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_32 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_16 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_8)) { + if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear + || (mode_lib->vba.SourcePixelFormat[k] == dm_444_64 + && (mode_lib->vba.SurfaceTiling[k] + == dm_sw_4kb_s + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_4kb_s_x + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_64kb_s + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_64kb_s_t + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_64kb_s_x + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_var_s + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_var_s_x) + && mode_lib->vba.SourceScan[k] == dm_horz)) { + MinimumSwathHeightY = MaximumSwathHeightY; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_8 + && mode_lib->vba.SourceScan[k] != dm_horz) { + MinimumSwathHeightY = MaximumSwathHeightY; + } else { + MinimumSwathHeightY = MaximumSwathHeightY / 2.0; + } + MinimumSwathHeightC = MaximumSwathHeightC; + } else { + if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) { + MinimumSwathHeightY = MaximumSwathHeightY; + MinimumSwathHeightC = MaximumSwathHeightC; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8 + && mode_lib->vba.SourceScan[k] == dm_horz) { + MinimumSwathHeightY = MaximumSwathHeightY / 2.0; + MinimumSwathHeightC = MaximumSwathHeightC; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10 + && mode_lib->vba.SourceScan[k] == dm_horz) { + MinimumSwathHeightC = MaximumSwathHeightC / 2.0; + MinimumSwathHeightY = MaximumSwathHeightY; + } else { + MinimumSwathHeightY = MaximumSwathHeightY; + MinimumSwathHeightC = MaximumSwathHeightC; + } + } + + if (mode_lib->vba.SourceScan[k] == dm_horz) { + SwathWidth = mode_lib->vba.ViewportWidth[k]; + } else { + SwathWidth = mode_lib->vba.ViewportHeight[k]; + } + + if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) { + MainPlaneDoesODMCombine = true; + } + for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) { + if (mode_lib->vba.BlendingAndTiming[k] == j + && mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) { + MainPlaneDoesODMCombine = true; + } + } + + if (MainPlaneDoesODMCombine == true) { + SwathWidth = dml_min( + SwathWidth, + mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k]); + } else { + if (mode_lib->vba.DPPPerPlane[k] == 0) + SwathWidth = 0; + else + SwathWidth = SwathWidth / mode_lib->vba.DPPPerPlane[k]; + } + + SwathWidthGranularityY = 256 / dml_ceil(BytePerPixDETY, 1) / MaximumSwathHeightY; + RoundedUpMaxSwathSizeBytesY = (dml_ceil( + (double) (SwathWidth - 1), + SwathWidthGranularityY) + SwathWidthGranularityY) * BytePerPixDETY + * MaximumSwathHeightY; + if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10) { + RoundedUpMaxSwathSizeBytesY = dml_ceil(RoundedUpMaxSwathSizeBytesY, 256) + + 256; + } + if (MaximumSwathHeightC > 0) { + SwathWidthGranularityC = 256.0 / dml_ceil(BytePerPixDETC, 2) + / MaximumSwathHeightC; + RoundedUpMaxSwathSizeBytesC = (dml_ceil( + (double) (SwathWidth / 2.0 - 1), + SwathWidthGranularityC) + SwathWidthGranularityC) + * BytePerPixDETC * MaximumSwathHeightC; + if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10) { + RoundedUpMaxSwathSizeBytesC = dml_ceil( + RoundedUpMaxSwathSizeBytesC, + 256) + 256; + } + } else + RoundedUpMaxSwathSizeBytesC = 0.0; + + if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC + <= mode_lib->vba.DETBufferSizeInKByte[0] * 1024.0 / 2.0) { + mode_lib->vba.SwathHeightY[k] = MaximumSwathHeightY; + mode_lib->vba.SwathHeightC[k] = MaximumSwathHeightC; + } else { + mode_lib->vba.SwathHeightY[k] = MinimumSwathHeightY; + mode_lib->vba.SwathHeightC[k] = MinimumSwathHeightC; + } + + if (mode_lib->vba.SwathHeightC[k] == 0) { + mode_lib->vba.DETBufferSizeY[k] = mode_lib->vba.DETBufferSizeInKByte[0] * 1024; + mode_lib->vba.DETBufferSizeC[k] = 0; + } else if (mode_lib->vba.SwathHeightY[k] <= mode_lib->vba.SwathHeightC[k]) { + mode_lib->vba.DETBufferSizeY[k] = mode_lib->vba.DETBufferSizeInKByte[0] + * 1024.0 / 2; + mode_lib->vba.DETBufferSizeC[k] = mode_lib->vba.DETBufferSizeInKByte[0] + * 1024.0 / 2; + } else { + mode_lib->vba.DETBufferSizeY[k] = mode_lib->vba.DETBufferSizeInKByte[0] + * 1024.0 * 2 / 3; + mode_lib->vba.DETBufferSizeC[k] = mode_lib->vba.DETBufferSizeInKByte[0] + * 1024.0 / 3; + } + } +} + +static double CalculateTWait( + unsigned int PrefetchMode, + double DRAMClockChangeLatency, + double UrgentLatencyPixelDataOnly, + double SREnterPlusExitTime) +{ + if (PrefetchMode == 0) { + return dml_max( + DRAMClockChangeLatency + UrgentLatencyPixelDataOnly, + dml_max(SREnterPlusExitTime, UrgentLatencyPixelDataOnly)); + } else if (PrefetchMode == 1) { + return dml_max(SREnterPlusExitTime, UrgentLatencyPixelDataOnly); + } else { + return UrgentLatencyPixelDataOnly; + } +} + +static double CalculateRemoteSurfaceFlipDelay( + struct display_mode_lib *mode_lib, + double VRatio, + double SwathWidth, + double Bpp, + double LineTime, + double XFCTSlvVupdateOffset, + double XFCTSlvVupdateWidth, + double XFCTSlvVreadyOffset, + double XFCXBUFLatencyTolerance, + double XFCFillBWOverhead, + double XFCSlvChunkSize, + double XFCBusTransportTime, + double TCalc, + double TWait, + double *SrcActiveDrainRate, + double *TInitXFill, + double *TslvChk) +{ + double TSlvSetup, AvgfillRate, result; + + *SrcActiveDrainRate = VRatio * SwathWidth * Bpp / LineTime; + TSlvSetup = XFCTSlvVupdateOffset + XFCTSlvVupdateWidth + XFCTSlvVreadyOffset; + *TInitXFill = XFCXBUFLatencyTolerance / (1 + XFCFillBWOverhead / 100); + AvgfillRate = *SrcActiveDrainRate * (1 + XFCFillBWOverhead / 100); + *TslvChk = XFCSlvChunkSize / AvgfillRate; + dml_print( + "DML::CalculateRemoteSurfaceFlipDelay: SrcActiveDrainRate: %f\n", + *SrcActiveDrainRate); + dml_print("DML::CalculateRemoteSurfaceFlipDelay: TSlvSetup: %f\n", TSlvSetup); + dml_print("DML::CalculateRemoteSurfaceFlipDelay: TInitXFill: %f\n", *TInitXFill); + dml_print("DML::CalculateRemoteSurfaceFlipDelay: AvgfillRate: %f\n", AvgfillRate); + dml_print("DML::CalculateRemoteSurfaceFlipDelay: TslvChk: %f\n", *TslvChk); + result = 2 * XFCBusTransportTime + TSlvSetup + TCalc + TWait + *TslvChk + *TInitXFill; // TODO: This doesn't seem to match programming guide + dml_print("DML::CalculateRemoteSurfaceFlipDelay: RemoteSurfaceFlipDelay: %f\n", result); + return result; +} + +static double CalculateWriteBackDelay( + enum source_format_class WritebackPixelFormat, + double WritebackHRatio, + double WritebackVRatio, + unsigned int WritebackLumaHTaps, + unsigned int WritebackLumaVTaps, + unsigned int WritebackChromaHTaps, + unsigned int WritebackChromaVTaps, + unsigned int WritebackDestinationWidth) +{ + double CalculateWriteBackDelay = + dml_max( + dml_ceil(WritebackLumaHTaps / 4.0, 1) / WritebackHRatio, + WritebackLumaVTaps * dml_ceil(1.0 / WritebackVRatio, 1) + * dml_ceil( + WritebackDestinationWidth + / 4.0, + 1) + + dml_ceil(1.0 / WritebackVRatio, 1) + * (dml_ceil( + WritebackLumaVTaps + / 4.0, + 1) + 4)); + + if (WritebackPixelFormat != dm_444_32) { + CalculateWriteBackDelay = + dml_max( + CalculateWriteBackDelay, + dml_max( + dml_ceil( + WritebackChromaHTaps + / 2.0, + 1) + / (2 + * WritebackHRatio), + WritebackChromaVTaps + * dml_ceil( + 1 + / (2 + * WritebackVRatio), + 1) + * dml_ceil( + WritebackDestinationWidth + / 2.0 + / 2.0, + 1) + + dml_ceil( + 1 + / (2 + * WritebackVRatio), + 1) + * (dml_ceil( + WritebackChromaVTaps + / 4.0, + 1) + + 4))); + } + return CalculateWriteBackDelay; +} + +static void CalculateActiveRowBandwidth( + bool GPUVMEnable, + enum source_format_class SourcePixelFormat, + double VRatio, + bool DCCEnable, + double LineTime, + unsigned int MetaRowByteLuma, + unsigned int MetaRowByteChroma, + unsigned int meta_row_height_luma, + unsigned int meta_row_height_chroma, + unsigned int PixelPTEBytesPerRowLuma, + unsigned int PixelPTEBytesPerRowChroma, + unsigned int dpte_row_height_luma, + unsigned int dpte_row_height_chroma, + double *meta_row_bw, + double *dpte_row_bw, + double *qual_row_bw) +{ + if (DCCEnable != true) { + *meta_row_bw = 0; + } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) { + *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + + VRatio / 2 * MetaRowByteChroma + / (meta_row_height_chroma * LineTime); + } else { + *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime); + } + + if (GPUVMEnable != true) { + *dpte_row_bw = 0; + } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) { + *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) + + VRatio / 2 * PixelPTEBytesPerRowChroma + / (dpte_row_height_chroma * LineTime); + } else { + *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime); + } + + if ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10)) { + *qual_row_bw = *meta_row_bw + *dpte_row_bw; + } else { + *qual_row_bw = 0; + } +} + +static void CalculateFlipSchedule( + struct display_mode_lib *mode_lib, + double UrgentExtraLatency, + double UrgentLatencyPixelDataOnly, + unsigned int GPUVMMaxPageTableLevels, + bool GPUVMEnable, + double BandwidthAvailableForImmediateFlip, + unsigned int TotImmediateFlipBytes, + enum source_format_class SourcePixelFormat, + unsigned int ImmediateFlipBytes, + double LineTime, + double VRatio, + double Tno_bw, + double PDEAndMetaPTEBytesFrame, + unsigned int MetaRowByte, + unsigned int PixelPTEBytesPerRow, + bool DCCEnable, + unsigned int dpte_row_height, + unsigned int meta_row_height, + double qual_row_bw, + double *DestinationLinesToRequestVMInImmediateFlip, + double *DestinationLinesToRequestRowInImmediateFlip, + double *final_flip_bw, + bool *ImmediateFlipSupportedForPipe) +{ + double min_row_time = 0.0; + + if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) { + *DestinationLinesToRequestVMInImmediateFlip = 0.0; + *DestinationLinesToRequestRowInImmediateFlip = 0.0; + *final_flip_bw = qual_row_bw; + *ImmediateFlipSupportedForPipe = true; + } else { + double TimeForFetchingMetaPTEImmediateFlip; + double TimeForFetchingRowInVBlankImmediateFlip; + + if (GPUVMEnable == true) { + mode_lib->vba.ImmediateFlipBW[0] = BandwidthAvailableForImmediateFlip + * ImmediateFlipBytes / TotImmediateFlipBytes; + TimeForFetchingMetaPTEImmediateFlip = + dml_max( + Tno_bw + + PDEAndMetaPTEBytesFrame + / mode_lib->vba.ImmediateFlipBW[0], + dml_max( + UrgentExtraLatency + + UrgentLatencyPixelDataOnly + * (GPUVMMaxPageTableLevels + - 1), + LineTime / 4.0)); + } else { + TimeForFetchingMetaPTEImmediateFlip = 0; + } + + *DestinationLinesToRequestVMInImmediateFlip = dml_floor( + 4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime + 0.125), + 1) / 4.0; + + if ((GPUVMEnable || DCCEnable)) { + mode_lib->vba.ImmediateFlipBW[0] = BandwidthAvailableForImmediateFlip + * ImmediateFlipBytes / TotImmediateFlipBytes; + TimeForFetchingRowInVBlankImmediateFlip = dml_max( + (MetaRowByte + PixelPTEBytesPerRow) + / mode_lib->vba.ImmediateFlipBW[0], + dml_max(UrgentLatencyPixelDataOnly, LineTime / 4.0)); + } else { + TimeForFetchingRowInVBlankImmediateFlip = 0; + } + + *DestinationLinesToRequestRowInImmediateFlip = dml_floor( + 4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime + 0.125), + 1) / 4.0; + + if (GPUVMEnable == true) { + *final_flip_bw = + dml_max( + PDEAndMetaPTEBytesFrame + / (*DestinationLinesToRequestVMInImmediateFlip + * LineTime), + (MetaRowByte + PixelPTEBytesPerRow) + / (TimeForFetchingRowInVBlankImmediateFlip + * LineTime)); + } else if (MetaRowByte + PixelPTEBytesPerRow > 0) { + *final_flip_bw = (MetaRowByte + PixelPTEBytesPerRow) + / (TimeForFetchingRowInVBlankImmediateFlip * LineTime); + } else { + *final_flip_bw = 0; + } + + if (GPUVMEnable && !DCCEnable) + min_row_time = dpte_row_height * LineTime / VRatio; + else if (!GPUVMEnable && DCCEnable) + min_row_time = meta_row_height * LineTime / VRatio; + else + min_row_time = dml_min(dpte_row_height, meta_row_height) * LineTime + / VRatio; + + if (*DestinationLinesToRequestVMInImmediateFlip >= 8 + || *DestinationLinesToRequestRowInImmediateFlip >= 16 + || TimeForFetchingMetaPTEImmediateFlip + + 2 * TimeForFetchingRowInVBlankImmediateFlip + > min_row_time) + *ImmediateFlipSupportedForPipe = false; + else + *ImmediateFlipSupportedForPipe = true; + } +} + +static unsigned int TruncToValidBPP( + double DecimalBPP, + bool DSCEnabled, + enum output_encoder_class Output, + enum output_format_class Format, + unsigned int DSCInputBitPerComponent) +{ + if (Output == dm_hdmi) { + if (Format == dm_420) { + if (DecimalBPP >= 18) + return 18; + else if (DecimalBPP >= 15) + return 15; + else if (DecimalBPP >= 12) + return 12; + else + return BPP_INVALID; + } else if (Format == dm_444) { + if (DecimalBPP >= 36) + return 36; + else if (DecimalBPP >= 30) + return 30; + else if (DecimalBPP >= 24) + return 24; + else if (DecimalBPP >= 18) + return 18; + else + return BPP_INVALID; + } else { + if (DecimalBPP / 1.5 >= 24) + return 24; + else if (DecimalBPP / 1.5 >= 20) + return 20; + else if (DecimalBPP / 1.5 >= 16) + return 16; + else + return BPP_INVALID; + } + } else { + if (DSCEnabled) { + if (Format == dm_420) { + if (DecimalBPP < 6) + return BPP_INVALID; + else if (DecimalBPP >= 1.5 * DSCInputBitPerComponent - 1 / 16) + return 1.5 * DSCInputBitPerComponent - 1 / 16; + else + return dml_floor(16 * DecimalBPP, 1) / 16; + } else if (Format == dm_n422) { + if (DecimalBPP < 7) + return BPP_INVALID; + else if (DecimalBPP >= 2 * DSCInputBitPerComponent - 1 / 16) + return 2 * DSCInputBitPerComponent - 1 / 16; + else + return dml_floor(16 * DecimalBPP, 1) / 16; + } else { + if (DecimalBPP < 8) + return BPP_INVALID; + else if (DecimalBPP >= 3 * DSCInputBitPerComponent - 1 / 16) + return 3 * DSCInputBitPerComponent - 1 / 16; + else + return dml_floor(16 * DecimalBPP, 1) / 16; + } + } else if (Format == dm_420) { + if (DecimalBPP >= 18) + return 18; + else if (DecimalBPP >= 15) + return 15; + else if (DecimalBPP >= 12) + return 12; + else + return BPP_INVALID; + } else if (Format == dm_s422 || Format == dm_n422) { + if (DecimalBPP >= 24) + return 24; + else if (DecimalBPP >= 20) + return 20; + else if (DecimalBPP >= 16) + return 16; + else + return BPP_INVALID; + } else { + if (DecimalBPP >= 36) + return 36; + else if (DecimalBPP >= 30) + return 30; + else if (DecimalBPP >= 24) + return 24; + else if (DecimalBPP >= 18) + return 18; + else + return BPP_INVALID; + } + } +} + +void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) +{ + struct vba_vars_st *locals = &mode_lib->vba; + + int i; + unsigned int j, k, m; + + /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/ + + /*Scale Ratio, taps Support Check*/ + + mode_lib->vba.ScaleRatioAndTapsSupport = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.ScalerEnabled[k] == false + && ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8) + || mode_lib->vba.HRatio[k] != 1.0 + || mode_lib->vba.htaps[k] != 1.0 + || mode_lib->vba.VRatio[k] != 1.0 + || mode_lib->vba.vtaps[k] != 1.0)) { + mode_lib->vba.ScaleRatioAndTapsSupport = false; + } else if (mode_lib->vba.vtaps[k] < 1.0 || mode_lib->vba.vtaps[k] > 8.0 + || mode_lib->vba.htaps[k] < 1.0 || mode_lib->vba.htaps[k] > 8.0 + || (mode_lib->vba.htaps[k] > 1.0 + && (mode_lib->vba.htaps[k] % 2) == 1) + || mode_lib->vba.HRatio[k] > mode_lib->vba.MaxHSCLRatio + || mode_lib->vba.VRatio[k] > mode_lib->vba.MaxVSCLRatio + || mode_lib->vba.HRatio[k] > mode_lib->vba.htaps[k] + || mode_lib->vba.VRatio[k] > mode_lib->vba.vtaps[k] + || (mode_lib->vba.SourcePixelFormat[k] != dm_444_64 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8 + && (mode_lib->vba.HRatio[k] / 2.0 + > mode_lib->vba.HTAPsChroma[k] + || mode_lib->vba.VRatio[k] / 2.0 + > mode_lib->vba.VTAPsChroma[k]))) { + mode_lib->vba.ScaleRatioAndTapsSupport = false; + } + } + /*Source Format, Pixel Format and Scan Support Check*/ + + mode_lib->vba.SourceFormatPixelAndScanSupport = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if ((mode_lib->vba.SurfaceTiling[k] == dm_sw_linear + && mode_lib->vba.SourceScan[k] != dm_horz) + || ((mode_lib->vba.SurfaceTiling[k] == dm_sw_4kb_d + || mode_lib->vba.SurfaceTiling[k] == dm_sw_4kb_d_x + || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_d + || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_d_t + || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_d_x + || mode_lib->vba.SurfaceTiling[k] == dm_sw_var_d + || mode_lib->vba.SurfaceTiling[k] == dm_sw_var_d_x) + && mode_lib->vba.SourcePixelFormat[k] != dm_444_64) + || (mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_r_x + && (mode_lib->vba.SourcePixelFormat[k] == dm_mono_8 + || mode_lib->vba.SourcePixelFormat[k] + == dm_420_8 + || mode_lib->vba.SourcePixelFormat[k] + == dm_420_10)) + || (((mode_lib->vba.SurfaceTiling[k] == dm_sw_gfx7_2d_thin_gl + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_gfx7_2d_thin_l_vp) + && !((mode_lib->vba.SourcePixelFormat[k] + == dm_444_64 + || mode_lib->vba.SourcePixelFormat[k] + == dm_444_32) + && mode_lib->vba.SourceScan[k] + == dm_horz + && mode_lib->vba.SupportGFX7CompatibleTilingIn32bppAnd64bpp + == true + && mode_lib->vba.DCCEnable[k] + == false)) + || (mode_lib->vba.DCCEnable[k] == true + && (mode_lib->vba.SurfaceTiling[k] + == dm_sw_linear + || mode_lib->vba.SourcePixelFormat[k] + == dm_420_8 + || mode_lib->vba.SourcePixelFormat[k] + == dm_420_10)))) { + mode_lib->vba.SourceFormatPixelAndScanSupport = false; + } + } + /*Bandwidth Support Check*/ + + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) { + locals->BytePerPixelInDETY[k] = 8.0; + locals->BytePerPixelInDETC[k] = 0.0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32) { + locals->BytePerPixelInDETY[k] = 4.0; + locals->BytePerPixelInDETC[k] = 0.0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_16 + || mode_lib->vba.SourcePixelFormat[k] == dm_mono_16) { + locals->BytePerPixelInDETY[k] = 2.0; + locals->BytePerPixelInDETC[k] = 0.0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_mono_8) { + locals->BytePerPixelInDETY[k] = 1.0; + locals->BytePerPixelInDETC[k] = 0.0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) { + locals->BytePerPixelInDETY[k] = 1.0; + locals->BytePerPixelInDETC[k] = 2.0; + } else { + locals->BytePerPixelInDETY[k] = 4.0 / 3; + locals->BytePerPixelInDETC[k] = 8.0 / 3; + } + if (mode_lib->vba.SourceScan[k] == dm_horz) { + locals->SwathWidthYSingleDPP[k] = mode_lib->vba.ViewportWidth[k]; + } else { + locals->SwathWidthYSingleDPP[k] = mode_lib->vba.ViewportHeight[k]; + } + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + locals->ReadBandwidthLuma[k] = locals->SwathWidthYSingleDPP[k] * dml_ceil(locals->BytePerPixelInDETY[k], 1.0) + / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k]; + locals->ReadBandwidthChroma[k] = locals->SwathWidthYSingleDPP[k] / 2 * dml_ceil(locals->BytePerPixelInDETC[k], 2.0) + / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k] / 2.0; + locals->ReadBandwidth[k] = locals->ReadBandwidthLuma[k] + locals->ReadBandwidthChroma[k]; + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.WritebackEnable[k] == true + && mode_lib->vba.WritebackPixelFormat[k] == dm_444_32) { + locals->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k] + * mode_lib->vba.WritebackDestinationHeight[k] + / (mode_lib->vba.WritebackSourceHeight[k] + * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]) * 4.0; + } else if (mode_lib->vba.WritebackEnable[k] == true + && mode_lib->vba.WritebackPixelFormat[k] == dm_420_10) { + locals->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k] + * mode_lib->vba.WritebackDestinationHeight[k] + / (mode_lib->vba.WritebackSourceHeight[k] + * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]) * 3.0; + } else if (mode_lib->vba.WritebackEnable[k] == true) { + locals->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k] + * mode_lib->vba.WritebackDestinationHeight[k] + / (mode_lib->vba.WritebackSourceHeight[k] + * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]) * 1.5; + } else { + locals->WriteBandwidth[k] = 0.0; + } + } + mode_lib->vba.DCCEnabledInAnyPlane = false; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.DCCEnable[k] == true) { + mode_lib->vba.DCCEnabledInAnyPlane = true; + } + } + mode_lib->vba.UrgentLatency = mode_lib->vba.UrgentLatencyPixelDataOnly; + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + locals->FabricAndDRAMBandwidthPerState[i] = dml_min( + mode_lib->vba.DRAMSpeedPerState[i] * mode_lib->vba.NumberOfChannels + * mode_lib->vba.DRAMChannelWidth, + mode_lib->vba.FabricClockPerState[i] + * mode_lib->vba.FabricDatapathToDCNDataReturn) / 1000; + locals->ReturnBWToDCNPerState = dml_min(locals->ReturnBusWidth * locals->DCFCLKPerState[i], + locals->FabricAndDRAMBandwidthPerState[i] * 1000) + * locals->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly / 100; + + locals->ReturnBWPerState[i][0] = locals->ReturnBWToDCNPerState; + + if (locals->DCCEnabledInAnyPlane == true && locals->ReturnBWToDCNPerState > locals->DCFCLKPerState[i] * locals->ReturnBusWidth / 4) { + locals->ReturnBWPerState[i][0] = dml_min(locals->ReturnBWPerState[i][0], + locals->ReturnBWToDCNPerState * 4 * (1 - locals->UrgentLatency / + ((locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024 + / (locals->ReturnBWToDCNPerState - locals->DCFCLKPerState[i] + * locals->ReturnBusWidth / 4) + locals->UrgentLatency))); + } + locals->CriticalPoint = 2 * locals->ReturnBusWidth * locals->DCFCLKPerState[i] * + locals->UrgentLatency / (locals->ReturnBWToDCNPerState * locals->UrgentLatency + + (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024); + + if (locals->DCCEnabledInAnyPlane && locals->CriticalPoint > 1 && locals->CriticalPoint < 4) { + locals->ReturnBWPerState[i][0] = dml_min(locals->ReturnBWPerState[i][0], + 4 * locals->ReturnBWToDCNPerState * + (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024 + * locals->ReturnBusWidth * locals->DCFCLKPerState[i] * locals->UrgentLatency / + dml_pow((locals->ReturnBWToDCNPerState * locals->UrgentLatency + + (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024), 2)); + } + + locals->ReturnBWToDCNPerState = dml_min(locals->ReturnBusWidth * + locals->DCFCLKPerState[i], locals->FabricAndDRAMBandwidthPerState[i] * 1000); + + if (locals->DCCEnabledInAnyPlane == true && locals->ReturnBWToDCNPerState > locals->DCFCLKPerState[i] * locals->ReturnBusWidth / 4) { + locals->ReturnBWPerState[i][0] = dml_min(locals->ReturnBWPerState[i][0], + locals->ReturnBWToDCNPerState * 4 * (1 - locals->UrgentLatency / + ((locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024 + / (locals->ReturnBWToDCNPerState - locals->DCFCLKPerState[i] + * locals->ReturnBusWidth / 4) + locals->UrgentLatency))); + } + locals->CriticalPoint = 2 * locals->ReturnBusWidth * locals->DCFCLKPerState[i] * + locals->UrgentLatency / (locals->ReturnBWToDCNPerState * locals->UrgentLatency + + (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024); + + if (locals->DCCEnabledInAnyPlane && locals->CriticalPoint > 1 && locals->CriticalPoint < 4) { + locals->ReturnBWPerState[i][0] = dml_min(locals->ReturnBWPerState[i][0], + 4 * locals->ReturnBWToDCNPerState * + (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024 + * locals->ReturnBusWidth * locals->DCFCLKPerState[i] * locals->UrgentLatency / + dml_pow((locals->ReturnBWToDCNPerState * locals->UrgentLatency + + (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024), 2)); + } + } + /*Writeback Latency support check*/ + + mode_lib->vba.WritebackLatencySupport = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.WritebackEnable[k] == true) { + if (mode_lib->vba.WritebackPixelFormat[k] == dm_444_32) { + if (locals->WriteBandwidth[k] + > (mode_lib->vba.WritebackInterfaceLumaBufferSize + + mode_lib->vba.WritebackInterfaceChromaBufferSize) + / mode_lib->vba.WritebackLatency) { + mode_lib->vba.WritebackLatencySupport = false; + } + } else { + if (locals->WriteBandwidth[k] + > 1.5 + * dml_min( + mode_lib->vba.WritebackInterfaceLumaBufferSize, + 2.0 + * mode_lib->vba.WritebackInterfaceChromaBufferSize) + / mode_lib->vba.WritebackLatency) { + mode_lib->vba.WritebackLatencySupport = false; + } + } + } + } + /*Re-ordering Buffer Support Check*/ + + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i] = + (mode_lib->vba.RoundTripPingLatencyCycles + 32.0) / mode_lib->vba.DCFCLKPerState[i] + + locals->UrgentOutOfOrderReturnPerChannel * mode_lib->vba.NumberOfChannels / locals->ReturnBWPerState[i][0]; + if ((mode_lib->vba.ROBBufferSizeInKByte - mode_lib->vba.PixelChunkSizeInKByte) * 1024.0 / locals->ReturnBWPerState[i][0] + > locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i]) { + locals->ROBSupport[i][0] = true; + } else { + locals->ROBSupport[i][0] = false; + } + } + /*Writeback Mode Support Check*/ + + mode_lib->vba.TotalNumberOfActiveWriteback = 0; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.WritebackEnable[k] == true) { + if (mode_lib->vba.ActiveWritebacksPerPlane[k] == 0) + mode_lib->vba.ActiveWritebacksPerPlane[k] = 1; + mode_lib->vba.TotalNumberOfActiveWriteback = + mode_lib->vba.TotalNumberOfActiveWriteback + + mode_lib->vba.ActiveWritebacksPerPlane[k]; + } + } + mode_lib->vba.WritebackModeSupport = true; + if (mode_lib->vba.TotalNumberOfActiveWriteback > mode_lib->vba.MaxNumWriteback) { + mode_lib->vba.WritebackModeSupport = false; + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.WritebackEnable[k] == true + && mode_lib->vba.Writeback10bpc420Supported != true + && mode_lib->vba.WritebackPixelFormat[k] == dm_420_10) { + mode_lib->vba.WritebackModeSupport = false; + } + } + /*Writeback Scale Ratio and Taps Support Check*/ + + mode_lib->vba.WritebackScaleRatioAndTapsSupport = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.WritebackEnable[k] == true) { + if (mode_lib->vba.WritebackLumaAndChromaScalingSupported == false + && (mode_lib->vba.WritebackHRatio[k] != 1.0 + || mode_lib->vba.WritebackVRatio[k] != 1.0)) { + mode_lib->vba.WritebackScaleRatioAndTapsSupport = false; + } + if (mode_lib->vba.WritebackHRatio[k] > mode_lib->vba.WritebackMaxHSCLRatio + || mode_lib->vba.WritebackVRatio[k] + > mode_lib->vba.WritebackMaxVSCLRatio + || mode_lib->vba.WritebackHRatio[k] + < mode_lib->vba.WritebackMinHSCLRatio + || mode_lib->vba.WritebackVRatio[k] + < mode_lib->vba.WritebackMinVSCLRatio + || mode_lib->vba.WritebackLumaHTaps[k] + > mode_lib->vba.WritebackMaxHSCLTaps + || mode_lib->vba.WritebackLumaVTaps[k] + > mode_lib->vba.WritebackMaxVSCLTaps + || mode_lib->vba.WritebackHRatio[k] + > mode_lib->vba.WritebackLumaHTaps[k] + || mode_lib->vba.WritebackVRatio[k] + > mode_lib->vba.WritebackLumaVTaps[k] + || (mode_lib->vba.WritebackLumaHTaps[k] > 2.0 + && ((mode_lib->vba.WritebackLumaHTaps[k] % 2) + == 1)) + || (mode_lib->vba.WritebackPixelFormat[k] != dm_444_32 + && (mode_lib->vba.WritebackChromaHTaps[k] + > mode_lib->vba.WritebackMaxHSCLTaps + || mode_lib->vba.WritebackChromaVTaps[k] + > mode_lib->vba.WritebackMaxVSCLTaps + || 2.0 + * mode_lib->vba.WritebackHRatio[k] + > mode_lib->vba.WritebackChromaHTaps[k] + || 2.0 + * mode_lib->vba.WritebackVRatio[k] + > mode_lib->vba.WritebackChromaVTaps[k] + || (mode_lib->vba.WritebackChromaHTaps[k] > 2.0 + && ((mode_lib->vba.WritebackChromaHTaps[k] % 2) == 1))))) { + mode_lib->vba.WritebackScaleRatioAndTapsSupport = false; + } + if (mode_lib->vba.WritebackVRatio[k] < 1.0) { + mode_lib->vba.WritebackLumaVExtra = + dml_max(1.0 - 2.0 / dml_ceil(1.0 / mode_lib->vba.WritebackVRatio[k], 1.0), 0.0); + } else { + mode_lib->vba.WritebackLumaVExtra = -1; + } + if ((mode_lib->vba.WritebackPixelFormat[k] == dm_444_32 + && mode_lib->vba.WritebackLumaVTaps[k] + > (mode_lib->vba.WritebackLineBufferLumaBufferSize + + mode_lib->vba.WritebackLineBufferChromaBufferSize) + / 3.0 + / mode_lib->vba.WritebackDestinationWidth[k] + - mode_lib->vba.WritebackLumaVExtra) + || (mode_lib->vba.WritebackPixelFormat[k] == dm_420_8 + && mode_lib->vba.WritebackLumaVTaps[k] + > mode_lib->vba.WritebackLineBufferLumaBufferSize + * 8.0 / 10.0 / mode_lib->vba.WritebackDestinationWidth[k] + - mode_lib->vba.WritebackLumaVExtra) + || (mode_lib->vba.WritebackPixelFormat[k] == dm_420_10 + && mode_lib->vba.WritebackLumaVTaps[k] + > mode_lib->vba.WritebackLineBufferLumaBufferSize + * 8.0 / 10.0 + / mode_lib->vba.WritebackDestinationWidth[k] + - mode_lib->vba.WritebackLumaVExtra)) { + mode_lib->vba.WritebackScaleRatioAndTapsSupport = false; + } + if (2.0 * mode_lib->vba.WritebackVRatio[k] < 1) { + mode_lib->vba.WritebackChromaVExtra = 0.0; + } else { + mode_lib->vba.WritebackChromaVExtra = -1; + } + if ((mode_lib->vba.WritebackPixelFormat[k] == dm_420_8 + && mode_lib->vba.WritebackChromaVTaps[k] + > mode_lib->vba.WritebackLineBufferChromaBufferSize + * 8.0 / 10.0 / mode_lib->vba.WritebackDestinationWidth[k] + - mode_lib->vba.WritebackChromaVExtra) + || (mode_lib->vba.WritebackPixelFormat[k] == dm_420_10 + && mode_lib->vba.WritebackChromaVTaps[k] + > mode_lib->vba.WritebackLineBufferChromaBufferSize + * 8.0 / 10.0 + / mode_lib->vba.WritebackDestinationWidth[k] + - mode_lib->vba.WritebackChromaVExtra)) { + mode_lib->vba.WritebackScaleRatioAndTapsSupport = false; + } + } + } + /*Maximum DISPCLK/DPPCLK Support check*/ + + mode_lib->vba.WritebackRequiredDISPCLK = 0.0; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.WritebackEnable[k] == true) { + mode_lib->vba.WritebackRequiredDISPCLK = + dml_max( + mode_lib->vba.WritebackRequiredDISPCLK, + CalculateWriteBackDISPCLK( + mode_lib->vba.WritebackPixelFormat[k], + mode_lib->vba.PixelClock[k], + mode_lib->vba.WritebackHRatio[k], + mode_lib->vba.WritebackVRatio[k], + mode_lib->vba.WritebackLumaHTaps[k], + mode_lib->vba.WritebackLumaVTaps[k], + mode_lib->vba.WritebackChromaHTaps[k], + mode_lib->vba.WritebackChromaVTaps[k], + mode_lib->vba.WritebackDestinationWidth[k], + mode_lib->vba.HTotal[k], + mode_lib->vba.WritebackChromaLineBufferWidth)); + } + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.HRatio[k] > 1.0) { + locals->PSCL_FACTOR[k] = dml_min( + mode_lib->vba.MaxDCHUBToPSCLThroughput, + mode_lib->vba.MaxPSCLToLBThroughput + * mode_lib->vba.HRatio[k] + / dml_ceil( + mode_lib->vba.htaps[k] + / 6.0, + 1.0)); + } else { + locals->PSCL_FACTOR[k] = dml_min( + mode_lib->vba.MaxDCHUBToPSCLThroughput, + mode_lib->vba.MaxPSCLToLBThroughput); + } + if (locals->BytePerPixelInDETC[k] == 0.0) { + locals->PSCL_FACTOR_CHROMA[k] = 0.0; + locals->MinDPPCLKUsingSingleDPP[k] = + mode_lib->vba.PixelClock[k] + * dml_max3( + mode_lib->vba.vtaps[k] / 6.0 + * dml_min( + 1.0, + mode_lib->vba.HRatio[k]), + mode_lib->vba.HRatio[k] + * mode_lib->vba.VRatio[k] + / locals->PSCL_FACTOR[k], + 1.0); + if ((mode_lib->vba.htaps[k] > 6.0 || mode_lib->vba.vtaps[k] > 6.0) + && locals->MinDPPCLKUsingSingleDPP[k] + < 2.0 * mode_lib->vba.PixelClock[k]) { + locals->MinDPPCLKUsingSingleDPP[k] = 2.0 + * mode_lib->vba.PixelClock[k]; + } + } else { + if (mode_lib->vba.HRatio[k] / 2.0 > 1.0) { + locals->PSCL_FACTOR_CHROMA[k] = + dml_min( + mode_lib->vba.MaxDCHUBToPSCLThroughput, + mode_lib->vba.MaxPSCLToLBThroughput + * mode_lib->vba.HRatio[k] + / 2.0 + / dml_ceil( + mode_lib->vba.HTAPsChroma[k] + / 6.0, + 1.0)); + } else { + locals->PSCL_FACTOR_CHROMA[k] = dml_min( + mode_lib->vba.MaxDCHUBToPSCLThroughput, + mode_lib->vba.MaxPSCLToLBThroughput); + } + locals->MinDPPCLKUsingSingleDPP[k] = + mode_lib->vba.PixelClock[k] + * dml_max5( + mode_lib->vba.vtaps[k] / 6.0 + * dml_min( + 1.0, + mode_lib->vba.HRatio[k]), + mode_lib->vba.HRatio[k] + * mode_lib->vba.VRatio[k] + / locals->PSCL_FACTOR[k], + mode_lib->vba.VTAPsChroma[k] + / 6.0 + * dml_min( + 1.0, + mode_lib->vba.HRatio[k] + / 2.0), + mode_lib->vba.HRatio[k] + * mode_lib->vba.VRatio[k] + / 4.0 + / locals->PSCL_FACTOR_CHROMA[k], + 1.0); + if ((mode_lib->vba.htaps[k] > 6.0 || mode_lib->vba.vtaps[k] > 6.0 + || mode_lib->vba.HTAPsChroma[k] > 6.0 + || mode_lib->vba.VTAPsChroma[k] > 6.0) + && locals->MinDPPCLKUsingSingleDPP[k] + < 2.0 * mode_lib->vba.PixelClock[k]) { + locals->MinDPPCLKUsingSingleDPP[k] = 2.0 + * mode_lib->vba.PixelClock[k]; + } + } + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + Calculate256BBlockSizes( + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.SurfaceTiling[k], + dml_ceil(locals->BytePerPixelInDETY[k], 1.0), + dml_ceil(locals->BytePerPixelInDETC[k], 2.0), + &locals->Read256BlockHeightY[k], + &locals->Read256BlockHeightC[k], + &locals->Read256BlockWidthY[k], + &locals->Read256BlockWidthC[k]); + if (mode_lib->vba.SourceScan[k] == dm_horz) { + locals->MaxSwathHeightY[k] = locals->Read256BlockHeightY[k]; + locals->MaxSwathHeightC[k] = locals->Read256BlockHeightC[k]; + } else { + locals->MaxSwathHeightY[k] = locals->Read256BlockWidthY[k]; + locals->MaxSwathHeightC[k] = locals->Read256BlockWidthC[k]; + } + if ((mode_lib->vba.SourcePixelFormat[k] == dm_444_64 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_32 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_16 + || mode_lib->vba.SourcePixelFormat[k] == dm_mono_16 + || mode_lib->vba.SourcePixelFormat[k] == dm_mono_8)) { + if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear + || (mode_lib->vba.SourcePixelFormat[k] == dm_444_64 + && (mode_lib->vba.SurfaceTiling[k] + == dm_sw_4kb_s + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_4kb_s_x + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_64kb_s + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_64kb_s_t + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_64kb_s_x + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_var_s + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_var_s_x) + && mode_lib->vba.SourceScan[k] == dm_horz)) { + locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k]; + } else { + locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k] + / 2.0; + } + locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k]; + } else { + if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) { + locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k]; + locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k]; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8 + && mode_lib->vba.SourceScan[k] == dm_horz) { + locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k] + / 2.0; + locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k]; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10 + && mode_lib->vba.SourceScan[k] == dm_horz) { + locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k] + / 2.0; + locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k]; + } else { + locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k]; + locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k]; + } + } + if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) { + mode_lib->vba.MaximumSwathWidthSupport = 8192.0; + } else { + mode_lib->vba.MaximumSwathWidthSupport = 5120.0; + } + mode_lib->vba.MaximumSwathWidthInDETBuffer = + dml_min( + mode_lib->vba.MaximumSwathWidthSupport, + mode_lib->vba.DETBufferSizeInKByte[0] * 1024.0 / 2.0 + / (locals->BytePerPixelInDETY[k] + * locals->MinSwathHeightY[k] + + locals->BytePerPixelInDETC[k] + / 2.0 + * locals->MinSwathHeightC[k])); + if (locals->BytePerPixelInDETC[k] == 0.0) { + mode_lib->vba.MaximumSwathWidthInLineBuffer = + mode_lib->vba.LineBufferSize + * dml_max(mode_lib->vba.HRatio[k], 1.0) + / mode_lib->vba.LBBitPerPixel[k] + / (mode_lib->vba.vtaps[k] + + dml_max( + dml_ceil( + mode_lib->vba.VRatio[k], + 1.0) + - 2, + 0.0)); + } else { + mode_lib->vba.MaximumSwathWidthInLineBuffer = + dml_min( + mode_lib->vba.LineBufferSize + * dml_max( + mode_lib->vba.HRatio[k], + 1.0) + / mode_lib->vba.LBBitPerPixel[k] + / (mode_lib->vba.vtaps[k] + + dml_max( + dml_ceil( + mode_lib->vba.VRatio[k], + 1.0) + - 2, + 0.0)), + 2.0 * mode_lib->vba.LineBufferSize + * dml_max( + mode_lib->vba.HRatio[k] + / 2.0, + 1.0) + / mode_lib->vba.LBBitPerPixel[k] + / (mode_lib->vba.VTAPsChroma[k] + + dml_max( + dml_ceil( + mode_lib->vba.VRatio[k] + / 2.0, + 1.0) + - 2, + 0.0))); + } + locals->MaximumSwathWidth[k] = dml_min( + mode_lib->vba.MaximumSwathWidthInDETBuffer, + mode_lib->vba.MaximumSwathWidthInLineBuffer); + } + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + for (j = 0; j < 2; j++) { + mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown( + mode_lib->vba.MaxDispclk[i], + mode_lib->vba.DISPCLKDPPCLKVCOSpeed); + mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown( + mode_lib->vba.MaxDppclk[i], + mode_lib->vba.DISPCLKDPPCLKVCOSpeed); + locals->RequiredDISPCLK[i][j] = 0.0; + locals->DISPCLK_DPPCLK_Support[i][j] = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine = + mode_lib->vba.PixelClock[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) + * (1.0 + mode_lib->vba.DISPCLKRampingMargin / 100.0); + if (mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine >= mode_lib->vba.MaxDispclk[i] + && i == mode_lib->vba.soc.num_states) + mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine = mode_lib->vba.PixelClock[k] + * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); + + mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = mode_lib->vba.PixelClock[k] / 2 + * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * (1 + mode_lib->vba.DISPCLKRampingMargin / 100.0); + if (mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine >= mode_lib->vba.MaxDispclk[i] + && i == mode_lib->vba.soc.num_states) + mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = mode_lib->vba.PixelClock[k] / 2 + * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); + + locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled; + mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine; + if (mode_lib->vba.ODMCapability) { + if (locals->PlaneRequiredDISPCLKWithoutODMCombine > mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity) { + locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; + mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine; + } else if (locals->HActive[k] > DCN20_MAX_420_IMAGE_WIDTH && locals->OutputFormat[k] == dm_420) { + locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; + mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine; + } + } + + if (locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) <= mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity + && locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k] + && locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) { + locals->NoOfDPP[i][j][k] = 1; + locals->RequiredDPPCLK[i][j][k] = + locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); + } else { + locals->NoOfDPP[i][j][k] = 2; + locals->RequiredDPPCLK[i][j][k] = + locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0; + } + locals->RequiredDISPCLK[i][j] = dml_max( + locals->RequiredDISPCLK[i][j], + mode_lib->vba.PlaneRequiredDISPCLK); + if ((locals->MinDPPCLKUsingSingleDPP[k] / locals->NoOfDPP[i][j][k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) + > mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity) + || (mode_lib->vba.PlaneRequiredDISPCLK > mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity)) { + locals->DISPCLK_DPPCLK_Support[i][j] = false; + } + } + locals->TotalNumberOfActiveDPP[i][j] = 0.0; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) + locals->TotalNumberOfActiveDPP[i][j] = locals->TotalNumberOfActiveDPP[i][j] + locals->NoOfDPP[i][j][k]; + if (j == 1) { + while (locals->TotalNumberOfActiveDPP[i][j] < mode_lib->vba.MaxNumDPP + && locals->TotalNumberOfActiveDPP[i][j] < 2 * mode_lib->vba.NumberOfActivePlanes) { + double BWOfNonSplitPlaneOfMaximumBandwidth; + unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth; + + BWOfNonSplitPlaneOfMaximumBandwidth = 0; + NumberOfNonSplitPlaneOfMaximumBandwidth = 0; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (locals->ReadBandwidth[k] > BWOfNonSplitPlaneOfMaximumBandwidth && locals->NoOfDPP[i][j][k] == 1) { + BWOfNonSplitPlaneOfMaximumBandwidth = locals->ReadBandwidth[k]; + NumberOfNonSplitPlaneOfMaximumBandwidth = k; + } + } + locals->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2; + locals->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = + locals->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth] + * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2; + locals->TotalNumberOfActiveDPP[i][j] = locals->TotalNumberOfActiveDPP[i][j] + 1; + } + } + if (locals->TotalNumberOfActiveDPP[i][j] > mode_lib->vba.MaxNumDPP) { + locals->RequiredDISPCLK[i][j] = 0.0; + locals->DISPCLK_DPPCLK_Support[i][j] = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled; + if (locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k]) { + locals->NoOfDPP[i][j][k] = 1; + locals->RequiredDPPCLK[i][j][k] = locals->MinDPPCLKUsingSingleDPP[k] + * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); + } else { + locals->NoOfDPP[i][j][k] = 2; + locals->RequiredDPPCLK[i][j][k] = locals->MinDPPCLKUsingSingleDPP[k] + * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0; + } + if (i != mode_lib->vba.soc.num_states) { + mode_lib->vba.PlaneRequiredDISPCLK = + mode_lib->vba.PixelClock[k] + * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) + * (1.0 + mode_lib->vba.DISPCLKRampingMargin / 100.0); + } else { + mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PixelClock[k] + * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); + } + locals->RequiredDISPCLK[i][j] = dml_max( + locals->RequiredDISPCLK[i][j], + mode_lib->vba.PlaneRequiredDISPCLK); + if (locals->MinDPPCLKUsingSingleDPP[k] / locals->NoOfDPP[i][j][k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) + > mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity + || mode_lib->vba.PlaneRequiredDISPCLK > mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity) + locals->DISPCLK_DPPCLK_Support[i][j] = false; + } + locals->TotalNumberOfActiveDPP[i][j] = 0.0; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) + locals->TotalNumberOfActiveDPP[i][j] = locals->TotalNumberOfActiveDPP[i][j] + locals->NoOfDPP[i][j][k]; + } + locals->RequiredDISPCLK[i][j] = dml_max( + locals->RequiredDISPCLK[i][j], + mode_lib->vba.WritebackRequiredDISPCLK); + if (mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity + < mode_lib->vba.WritebackRequiredDISPCLK) { + locals->DISPCLK_DPPCLK_Support[i][j] = false; + } + } + } + /*Viewport Size Check*/ + + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + locals->ViewportSizeSupport[i][0] = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { + if (dml_min(locals->SwathWidthYSingleDPP[k], dml_round(mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k])) + > locals->MaximumSwathWidth[k]) { + locals->ViewportSizeSupport[i][0] = false; + } + } else { + if (locals->SwathWidthYSingleDPP[k] / 2.0 > locals->MaximumSwathWidth[k]) { + locals->ViewportSizeSupport[i][0] = false; + } + } + } + } + /*Total Available Pipes Support Check*/ + + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + for (j = 0; j < 2; j++) { + if (locals->TotalNumberOfActiveDPP[i][j] <= mode_lib->vba.MaxNumDPP) + locals->TotalAvailablePipesSupport[i][j] = true; + else + locals->TotalAvailablePipesSupport[i][j] = false; + } + } + /*Total Available OTG Support Check*/ + + mode_lib->vba.TotalNumberOfActiveOTG = 0.0; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.BlendingAndTiming[k] == k) { + mode_lib->vba.TotalNumberOfActiveOTG = mode_lib->vba.TotalNumberOfActiveOTG + + 1.0; + } + } + if (mode_lib->vba.TotalNumberOfActiveOTG <= mode_lib->vba.MaxNumOTG) { + mode_lib->vba.NumberOfOTGSupport = true; + } else { + mode_lib->vba.NumberOfOTGSupport = false; + } + /*Display IO and DSC Support Check*/ + + mode_lib->vba.NonsupportedDSCInputBPC = false; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (!(mode_lib->vba.DSCInputBitPerComponent[k] == 12.0 + || mode_lib->vba.DSCInputBitPerComponent[k] == 10.0 + || mode_lib->vba.DSCInputBitPerComponent[k] == 8.0)) { + mode_lib->vba.NonsupportedDSCInputBPC = true; + } + } + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + locals->RequiresDSC[i][k] = 0; + locals->RequiresFEC[i][k] = 0; + if (mode_lib->vba.BlendingAndTiming[k] == k) { + if (mode_lib->vba.Output[k] == dm_hdmi) { + locals->RequiresDSC[i][k] = 0; + locals->RequiresFEC[i][k] = 0; + locals->OutputBppPerState[i][k] = TruncToValidBPP( + dml_min(600.0, mode_lib->vba.PHYCLKPerState[i]) / mode_lib->vba.PixelClockBackEnd[k] * 24, + false, + mode_lib->vba.Output[k], + mode_lib->vba.OutputFormat[k], + mode_lib->vba.DSCInputBitPerComponent[k]); + } else if (mode_lib->vba.Output[k] == dm_dp + || mode_lib->vba.Output[k] == dm_edp) { + if (mode_lib->vba.Output[k] == dm_edp) { + mode_lib->vba.EffectiveFECOverhead = 0.0; + } else { + mode_lib->vba.EffectiveFECOverhead = + mode_lib->vba.FECOverhead; + } + if (mode_lib->vba.PHYCLKPerState[i] >= 270.0) { + mode_lib->vba.Outbpp = TruncToValidBPP( + (1.0 - mode_lib->vba.Downspreading / 100.0) * 270.0 + * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, + false, + mode_lib->vba.Output[k], + mode_lib->vba.OutputFormat[k], + mode_lib->vba.DSCInputBitPerComponent[k]); + mode_lib->vba.OutbppDSC = TruncToValidBPP( + (1.0 - mode_lib->vba.Downspreading / 100.0) * (1.0 - mode_lib->vba.EffectiveFECOverhead / 100.0) * 270.0 + * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, + true, + mode_lib->vba.Output[k], + mode_lib->vba.OutputFormat[k], + mode_lib->vba.DSCInputBitPerComponent[k]); + if (mode_lib->vba.DSCEnabled[k] == true) { + locals->RequiresDSC[i][k] = true; + if (mode_lib->vba.Output[k] == dm_dp) { + locals->RequiresFEC[i][k] = true; + } else { + locals->RequiresFEC[i][k] = false; + } + mode_lib->vba.Outbpp = mode_lib->vba.OutbppDSC; + } else { + locals->RequiresDSC[i][k] = false; + locals->RequiresFEC[i][k] = false; + } + locals->OutputBppPerState[i][k] = mode_lib->vba.Outbpp; + } + if (mode_lib->vba.Outbpp == BPP_INVALID && mode_lib->vba.PHYCLKPerState[i] >= 540.0) { + mode_lib->vba.Outbpp = TruncToValidBPP( + (1.0 - mode_lib->vba.Downspreading / 100.0) * 540.0 + * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, + false, + mode_lib->vba.Output[k], + mode_lib->vba.OutputFormat[k], + mode_lib->vba.DSCInputBitPerComponent[k]); + mode_lib->vba.OutbppDSC = TruncToValidBPP( + (1.0 - mode_lib->vba.Downspreading / 100.0) * (1.0 - mode_lib->vba.EffectiveFECOverhead / 100.0) * 540.0 + * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, + true, + mode_lib->vba.Output[k], + mode_lib->vba.OutputFormat[k], + mode_lib->vba.DSCInputBitPerComponent[k]); + if (mode_lib->vba.DSCEnabled[k] == true) { + locals->RequiresDSC[i][k] = true; + if (mode_lib->vba.Output[k] == dm_dp) { + locals->RequiresFEC[i][k] = true; + } else { + locals->RequiresFEC[i][k] = false; + } + mode_lib->vba.Outbpp = mode_lib->vba.OutbppDSC; + } else { + locals->RequiresDSC[i][k] = false; + locals->RequiresFEC[i][k] = false; + } + locals->OutputBppPerState[i][k] = mode_lib->vba.Outbpp; + } + if (mode_lib->vba.Outbpp == BPP_INVALID + && mode_lib->vba.PHYCLKPerState[i] + >= 810.0) { + mode_lib->vba.Outbpp = TruncToValidBPP( + (1.0 - mode_lib->vba.Downspreading / 100.0) * 810.0 + * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, + false, + mode_lib->vba.Output[k], + mode_lib->vba.OutputFormat[k], + mode_lib->vba.DSCInputBitPerComponent[k]); + mode_lib->vba.OutbppDSC = TruncToValidBPP( + (1.0 - mode_lib->vba.Downspreading / 100.0) * (1.0 - mode_lib->vba.EffectiveFECOverhead / 100.0) * 810.0 + * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, + true, + mode_lib->vba.Output[k], + mode_lib->vba.OutputFormat[k], + mode_lib->vba.DSCInputBitPerComponent[k]); + if (mode_lib->vba.DSCEnabled[k] == true || mode_lib->vba.Outbpp == BPP_INVALID) { + locals->RequiresDSC[i][k] = true; + if (mode_lib->vba.Output[k] == dm_dp) { + locals->RequiresFEC[i][k] = true; + } else { + locals->RequiresFEC[i][k] = false; + } + mode_lib->vba.Outbpp = mode_lib->vba.OutbppDSC; + } else { + locals->RequiresDSC[i][k] = false; + locals->RequiresFEC[i][k] = false; + } + locals->OutputBppPerState[i][k] = + mode_lib->vba.Outbpp; + } + } + } else { + locals->OutputBppPerState[i][k] = BPP_BLENDED_PIPE; + } + } + } + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + locals->DIOSupport[i] = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (!mode_lib->vba.skip_dio_check[k] + && (locals->OutputBppPerState[i][k] == BPP_INVALID + || (mode_lib->vba.OutputFormat[k] == dm_420 + && mode_lib->vba.Interlace[k] == true + && mode_lib->vba.ProgressiveToInterlaceUnitInOPP == true))) { + locals->DIOSupport[i] = false; + } + } + } + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + locals->DSCCLKRequiredMoreThanSupported[i] = false; + if (mode_lib->vba.BlendingAndTiming[k] == k) { + if ((mode_lib->vba.Output[k] == dm_dp + || mode_lib->vba.Output[k] == dm_edp)) { + if (mode_lib->vba.OutputFormat[k] == dm_420 + || mode_lib->vba.OutputFormat[k] + == dm_n422) { + mode_lib->vba.DSCFormatFactor = 2; + } else { + mode_lib->vba.DSCFormatFactor = 1; + } + if (locals->RequiresDSC[i][k] == true) { + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { + if (mode_lib->vba.PixelClockBackEnd[k] / 6.0 / mode_lib->vba.DSCFormatFactor + > (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i]) { + locals->DSCCLKRequiredMoreThanSupported[i] = + true; + } + } else { + if (mode_lib->vba.PixelClockBackEnd[k] / 3.0 / mode_lib->vba.DSCFormatFactor + > (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i]) { + locals->DSCCLKRequiredMoreThanSupported[i] = + true; + } + } + } + } + } + } + } + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + locals->NotEnoughDSCUnits[i] = false; + mode_lib->vba.TotalDSCUnitsRequired = 0.0; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (locals->RequiresDSC[i][k] == true) { + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { + mode_lib->vba.TotalDSCUnitsRequired = + mode_lib->vba.TotalDSCUnitsRequired + 2.0; + } else { + mode_lib->vba.TotalDSCUnitsRequired = + mode_lib->vba.TotalDSCUnitsRequired + 1.0; + } + } + } + if (mode_lib->vba.TotalDSCUnitsRequired > mode_lib->vba.NumberOfDSC) { + locals->NotEnoughDSCUnits[i] = true; + } + } + /*DSC Delay per state*/ + + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.BlendingAndTiming[k] != k) { + mode_lib->vba.slices = 0; + } else if (locals->RequiresDSC[i][k] == 0 + || locals->RequiresDSC[i][k] == false) { + mode_lib->vba.slices = 0; + } else if (mode_lib->vba.PixelClockBackEnd[k] > 3200.0) { + mode_lib->vba.slices = dml_ceil( + mode_lib->vba.PixelClockBackEnd[k] / 400.0, + 4.0); + } else if (mode_lib->vba.PixelClockBackEnd[k] > 1360.0) { + mode_lib->vba.slices = 8.0; + } else if (mode_lib->vba.PixelClockBackEnd[k] > 680.0) { + mode_lib->vba.slices = 4.0; + } else if (mode_lib->vba.PixelClockBackEnd[k] > 340.0) { + mode_lib->vba.slices = 2.0; + } else { + mode_lib->vba.slices = 1.0; + } + if (locals->OutputBppPerState[i][k] == BPP_BLENDED_PIPE + || locals->OutputBppPerState[i][k] == BPP_INVALID) { + mode_lib->vba.bpp = 0.0; + } else { + mode_lib->vba.bpp = locals->OutputBppPerState[i][k]; + } + if (locals->RequiresDSC[i][k] == true && mode_lib->vba.bpp != 0.0) { + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) { + locals->DSCDelayPerState[i][k] = + dscceComputeDelay( + mode_lib->vba.DSCInputBitPerComponent[k], + mode_lib->vba.bpp, + dml_ceil( + mode_lib->vba.HActive[k] + / mode_lib->vba.slices, + 1.0), + mode_lib->vba.slices, + mode_lib->vba.OutputFormat[k]) + + dscComputeDelay( + mode_lib->vba.OutputFormat[k]); + } else { + locals->DSCDelayPerState[i][k] = + 2.0 * (dscceComputeDelay( + mode_lib->vba.DSCInputBitPerComponent[k], + mode_lib->vba.bpp, + dml_ceil(mode_lib->vba.HActive[k] / mode_lib->vba.slices, 1.0), + mode_lib->vba.slices / 2, + mode_lib->vba.OutputFormat[k]) + + dscComputeDelay(mode_lib->vba.OutputFormat[k])); + } + locals->DSCDelayPerState[i][k] = + locals->DSCDelayPerState[i][k] * mode_lib->vba.PixelClock[k] / mode_lib->vba.PixelClockBackEnd[k]; + } else { + locals->DSCDelayPerState[i][k] = 0.0; + } + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + for (m = 0; m <= mode_lib->vba.NumberOfActivePlanes - 1; m++) { + for (j = 0; j <= mode_lib->vba.NumberOfActivePlanes - 1; j++) { + if (mode_lib->vba.BlendingAndTiming[k] == m && locals->RequiresDSC[i][m] == true) + locals->DSCDelayPerState[i][k] = locals->DSCDelayPerState[i][m]; + } + } + } + } + + //Prefetch Check + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + for (j = 0; j < 2; j++) { + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) + locals->SwathWidthYPerState[i][j][k] = dml_min(locals->SwathWidthYSingleDPP[k], dml_round(locals->HActive[k] / 2 * locals->HRatio[k])); + else + locals->SwathWidthYPerState[i][j][k] = locals->SwathWidthYSingleDPP[k] / locals->NoOfDPP[i][j][k]; + locals->SwathWidthGranularityY = 256 / dml_ceil(locals->BytePerPixelInDETY[k], 1) / locals->MaxSwathHeightY[k]; + locals->RoundedUpMaxSwathSizeBytesY = (dml_ceil(locals->SwathWidthYPerState[i][j][k] - 1, locals->SwathWidthGranularityY) + + locals->SwathWidthGranularityY) * locals->BytePerPixelInDETY[k] * locals->MaxSwathHeightY[k]; + if (locals->SourcePixelFormat[k] == dm_420_10) { + locals->RoundedUpMaxSwathSizeBytesY = dml_ceil(locals->RoundedUpMaxSwathSizeBytesY, 256) + 256; + } + if (locals->MaxSwathHeightC[k] > 0) { + locals->SwathWidthGranularityC = 256 / dml_ceil(locals->BytePerPixelInDETC[k], 2) / locals->MaxSwathHeightC[k]; + + locals->RoundedUpMaxSwathSizeBytesC = (dml_ceil(locals->SwathWidthYPerState[i][j][k] / 2 - 1, locals->SwathWidthGranularityC) + + locals->SwathWidthGranularityC) * locals->BytePerPixelInDETC[k] * locals->MaxSwathHeightC[k]; + } + if (locals->SourcePixelFormat[k] == dm_420_10) { + locals->RoundedUpMaxSwathSizeBytesC = dml_ceil(locals->RoundedUpMaxSwathSizeBytesC, 256) + 256; + } else { + locals->RoundedUpMaxSwathSizeBytesC = 0; + } + + if (locals->RoundedUpMaxSwathSizeBytesY + locals->RoundedUpMaxSwathSizeBytesC <= locals->DETBufferSizeInKByte[0] * 1024 / 2) { + locals->SwathHeightYPerState[i][j][k] = locals->MaxSwathHeightY[k]; + locals->SwathHeightCPerState[i][j][k] = locals->MaxSwathHeightC[k]; + } else { + locals->SwathHeightYPerState[i][j][k] = locals->MinSwathHeightY[k]; + locals->SwathHeightCPerState[i][j][k] = locals->MinSwathHeightC[k]; + } + + if (locals->BytePerPixelInDETC[k] == 0) { + locals->LinesInDETLuma = locals->DETBufferSizeInKByte[0] * 1024 / locals->BytePerPixelInDETY[k] / locals->SwathWidthYPerState[i][j][k]; + locals->LinesInDETChroma = 0; + } else if (locals->SwathHeightYPerState[i][j][k] <= locals->SwathHeightCPerState[i][j][k]) { + locals->LinesInDETLuma = locals->DETBufferSizeInKByte[0] * 1024 / 2 / locals->BytePerPixelInDETY[k] / + locals->SwathWidthYPerState[i][j][k]; + locals->LinesInDETChroma = locals->DETBufferSizeInKByte[0] * 1024 / 2 / locals->BytePerPixelInDETC[k] / (locals->SwathWidthYPerState[i][j][k] / 2); + } else { + locals->LinesInDETLuma = locals->DETBufferSizeInKByte[0] * 1024 * 2 / 3 / locals->BytePerPixelInDETY[k] / locals->SwathWidthYPerState[i][j][k]; + locals->LinesInDETChroma = locals->DETBufferSizeInKByte[0] * 1024 / 3 / locals->BytePerPixelInDETY[k] / (locals->SwathWidthYPerState[i][j][k] / 2); + } + + locals->EffectiveLBLatencyHidingSourceLinesLuma = dml_min(locals->MaxLineBufferLines, + dml_floor(locals->LineBufferSize / locals->LBBitPerPixel[k] / (locals->SwathWidthYPerState[i][j][k] + / dml_max(locals->HRatio[k], 1)), 1)) - (locals->vtaps[k] - 1); + + locals->EffectiveLBLatencyHidingSourceLinesChroma = dml_min(locals->MaxLineBufferLines, + dml_floor(locals->LineBufferSize / locals->LBBitPerPixel[k] + / (locals->SwathWidthYPerState[i][j][k] / 2 + / dml_max(locals->HRatio[k] / 2, 1)), 1)) - (locals->VTAPsChroma[k] - 1); + + locals->EffectiveDETLBLinesLuma = dml_floor(locals->LinesInDETLuma + dml_min( + locals->LinesInDETLuma * locals->RequiredDISPCLK[i][j] * locals->BytePerPixelInDETY[k] * + locals->PSCL_FACTOR[k] / locals->ReturnBWPerState[i][0], + locals->EffectiveLBLatencyHidingSourceLinesLuma), + locals->SwathHeightYPerState[i][j][k]); + + locals->EffectiveDETLBLinesChroma = dml_floor(locals->LinesInDETChroma + dml_min( + locals->LinesInDETChroma * locals->RequiredDISPCLK[i][j] * locals->BytePerPixelInDETC[k] * + locals->PSCL_FACTOR_CHROMA[k] / locals->ReturnBWPerState[i][0], + locals->EffectiveLBLatencyHidingSourceLinesChroma), + locals->SwathHeightCPerState[i][j][k]); + + if (locals->BytePerPixelInDETC[k] == 0) { + locals->UrgentLatencySupportUsPerState[i][j][k] = locals->EffectiveDETLBLinesLuma * (locals->HTotal[k] / locals->PixelClock[k]) + / locals->VRatio[k] - locals->EffectiveDETLBLinesLuma * locals->SwathWidthYPerState[i][j][k] * + dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i][0] / locals->NoOfDPP[i][j][k]); + } else { + locals->UrgentLatencySupportUsPerState[i][j][k] = dml_min( + locals->EffectiveDETLBLinesLuma * (locals->HTotal[k] / locals->PixelClock[k]) + / locals->VRatio[k] - locals->EffectiveDETLBLinesLuma * locals->SwathWidthYPerState[i][j][k] * + dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i][0] / locals->NoOfDPP[i][j][k]), + locals->EffectiveDETLBLinesChroma * (locals->HTotal[k] / locals->PixelClock[k]) / (locals->VRatio[k] / 2) - + locals->EffectiveDETLBLinesChroma * locals->SwathWidthYPerState[i][j][k] / 2 * + dml_ceil(locals->BytePerPixelInDETC[k], 2) / (locals->ReturnBWPerState[i][0] / locals->NoOfDPP[i][j][k])); + } + } + } + } + + for (i = 0; i <= locals->soc.num_states; i++) { + for (j = 0; j < 2; j++) { + locals->UrgentLatencySupport[i][j] = true; + for (k = 0; k < locals->NumberOfActivePlanes; k++) { + if (locals->UrgentLatencySupportUsPerState[i][j][k] < locals->UrgentLatency) + locals->UrgentLatencySupport[i][j] = false; + } + } + } + + + /*Prefetch Check*/ + for (i = 0; i <= locals->soc.num_states; i++) { + for (j = 0; j < 2; j++) { + locals->TotalNumberOfDCCActiveDPP[i][j] = 0; + for (k = 0; k < locals->NumberOfActivePlanes; k++) { + if (locals->DCCEnable[k] == true) { + locals->TotalNumberOfDCCActiveDPP[i][j] = + locals->TotalNumberOfDCCActiveDPP[i][j] + locals->NoOfDPP[i][j][k]; + } + } + } + } + + CalculateMinAndMaxPrefetchMode(locals->AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &locals->MinPrefetchMode, &locals->MaxPrefetchMode); + + for (i = 0; i <= locals->soc.num_states; i++) { + for (j = 0; j < 2; j++) { + for (k = 0; k < locals->NumberOfActivePlanes; k++) { + locals->NoOfDPPThisState[k] = locals->NoOfDPP[i][j][k]; + locals->RequiredDPPCLKThisState[k] = locals->RequiredDPPCLK[i][j][k]; + locals->SwathHeightYThisState[k] = locals->SwathHeightYPerState[i][j][k]; + locals->SwathHeightCThisState[k] = locals->SwathHeightCPerState[i][j][k]; + locals->SwathWidthYThisState[k] = locals->SwathWidthYPerState[i][j][k]; + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = dml_max( + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], + mode_lib->vba.PixelClock[k] / 16.0); + if (mode_lib->vba.BytePerPixelInDETC[k] == 0.0) { + if (mode_lib->vba.VRatio[k] <= 1.0) { + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = + dml_max( + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], + 1.1 + * dml_ceil( + mode_lib->vba.BytePerPixelInDETY[k], + 1.0) + / 64.0 + * mode_lib->vba.HRatio[k] + * mode_lib->vba.PixelClock[k] + / mode_lib->vba.NoOfDPP[i][j][k]); + } else { + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = + dml_max( + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], + 1.1 + * dml_ceil( + mode_lib->vba.BytePerPixelInDETY[k], + 1.0) + / 64.0 + * mode_lib->vba.PSCL_FACTOR[k] + * mode_lib->vba.RequiredDPPCLK[i][j][k]); + } + } else { + if (mode_lib->vba.VRatio[k] <= 1.0) { + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = + dml_max( + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], + 1.1 + * dml_ceil( + mode_lib->vba.BytePerPixelInDETY[k], + 1.0) + / 32.0 + * mode_lib->vba.HRatio[k] + * mode_lib->vba.PixelClock[k] + / mode_lib->vba.NoOfDPP[i][j][k]); + } else { + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = + dml_max( + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], + 1.1 + * dml_ceil( + mode_lib->vba.BytePerPixelInDETY[k], + 1.0) + / 32.0 + * mode_lib->vba.PSCL_FACTOR[k] + * mode_lib->vba.RequiredDPPCLK[i][j][k]); + } + if (mode_lib->vba.VRatio[k] / 2.0 <= 1.0) { + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = + dml_max( + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], + 1.1 + * dml_ceil( + mode_lib->vba.BytePerPixelInDETC[k], + 2.0) + / 32.0 + * mode_lib->vba.HRatio[k] + / 2.0 + * mode_lib->vba.PixelClock[k] + / mode_lib->vba.NoOfDPP[i][j][k]); + } else { + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = + dml_max( + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], + 1.1 + * dml_ceil( + mode_lib->vba.BytePerPixelInDETC[k], + 2.0) + / 32.0 + * mode_lib->vba.PSCL_FACTOR_CHROMA[k] + * mode_lib->vba.RequiredDPPCLK[i][j][k]); + } + } + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + mode_lib->vba.PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes( + mode_lib, + mode_lib->vba.DCCEnable[k], + mode_lib->vba.Read256BlockHeightY[k], + mode_lib->vba.Read256BlockWidthY[k], + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.SurfaceTiling[k], + dml_ceil(mode_lib->vba.BytePerPixelInDETY[k], 1.0), + mode_lib->vba.SourceScan[k], + mode_lib->vba.ViewportWidth[k], + mode_lib->vba.ViewportHeight[k], + mode_lib->vba.SwathWidthYPerState[i][j][k], + mode_lib->vba.GPUVMEnable, + mode_lib->vba.VMMPageSize, + mode_lib->vba.PTEBufferSizeInRequestsLuma, + mode_lib->vba.PDEProcessingBufIn64KBReqs, + mode_lib->vba.PitchY[k], + mode_lib->vba.DCCMetaPitchY[k], + &mode_lib->vba.MacroTileWidthY[k], + &mode_lib->vba.MetaRowBytesY, + &mode_lib->vba.DPTEBytesPerRowY, + &mode_lib->vba.PTEBufferSizeNotExceededY[i][j][k], + &mode_lib->vba.dpte_row_height[k], + &mode_lib->vba.meta_row_height[k]); + mode_lib->vba.PrefetchLinesY[0][0][k] = CalculatePrefetchSourceLines( + mode_lib, + mode_lib->vba.VRatio[k], + mode_lib->vba.vtaps[k], + mode_lib->vba.Interlace[k], + mode_lib->vba.ProgressiveToInterlaceUnitInOPP, + mode_lib->vba.SwathHeightYPerState[i][j][k], + mode_lib->vba.ViewportYStartY[k], + &mode_lib->vba.PrefillY[k], + &mode_lib->vba.MaxNumSwY[k]); + if ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8)) { + mode_lib->vba.PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes( + mode_lib, + mode_lib->vba.DCCEnable[k], + mode_lib->vba.Read256BlockHeightY[k], + mode_lib->vba.Read256BlockWidthY[k], + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.SurfaceTiling[k], + dml_ceil(mode_lib->vba.BytePerPixelInDETC[k], 2.0), + mode_lib->vba.SourceScan[k], + mode_lib->vba.ViewportWidth[k] / 2.0, + mode_lib->vba.ViewportHeight[k] / 2.0, + mode_lib->vba.SwathWidthYPerState[i][j][k] / 2.0, + mode_lib->vba.GPUVMEnable, + mode_lib->vba.VMMPageSize, + mode_lib->vba.PTEBufferSizeInRequestsLuma, + mode_lib->vba.PDEProcessingBufIn64KBReqs, + mode_lib->vba.PitchC[k], + 0.0, + &mode_lib->vba.MacroTileWidthC[k], + &mode_lib->vba.MetaRowBytesC, + &mode_lib->vba.DPTEBytesPerRowC, + &mode_lib->vba.PTEBufferSizeNotExceededC[i][j][k], + &mode_lib->vba.dpte_row_height_chroma[k], + &mode_lib->vba.meta_row_height_chroma[k]); + mode_lib->vba.PrefetchLinesC[0][0][k] = CalculatePrefetchSourceLines( + mode_lib, + mode_lib->vba.VRatio[k] / 2.0, + mode_lib->vba.VTAPsChroma[k], + mode_lib->vba.Interlace[k], + mode_lib->vba.ProgressiveToInterlaceUnitInOPP, + mode_lib->vba.SwathHeightCPerState[i][j][k], + mode_lib->vba.ViewportYStartC[k], + &mode_lib->vba.PrefillC[k], + &mode_lib->vba.MaxNumSwC[k]); + } else { + mode_lib->vba.PDEAndMetaPTEBytesPerFrameC = 0.0; + mode_lib->vba.MetaRowBytesC = 0.0; + mode_lib->vba.DPTEBytesPerRowC = 0.0; + locals->PrefetchLinesC[0][0][k] = 0.0; + locals->PTEBufferSizeNotExceededC[i][j][k] = true; + locals->PTEBufferSizeInRequestsForLuma = mode_lib->vba.PTEBufferSizeInRequestsLuma + mode_lib->vba.PTEBufferSizeInRequestsChroma; + } + locals->PDEAndMetaPTEBytesPerFrame[0][0][k] = + mode_lib->vba.PDEAndMetaPTEBytesPerFrameY + mode_lib->vba.PDEAndMetaPTEBytesPerFrameC; + locals->MetaRowBytes[0][0][k] = mode_lib->vba.MetaRowBytesY + mode_lib->vba.MetaRowBytesC; + locals->DPTEBytesPerRow[0][0][k] = mode_lib->vba.DPTEBytesPerRowY + mode_lib->vba.DPTEBytesPerRowC; + + CalculateActiveRowBandwidth( + mode_lib->vba.GPUVMEnable, + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.VRatio[k], + mode_lib->vba.DCCEnable[k], + mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k], + mode_lib->vba.MetaRowBytesY, + mode_lib->vba.MetaRowBytesC, + mode_lib->vba.meta_row_height[k], + mode_lib->vba.meta_row_height_chroma[k], + mode_lib->vba.DPTEBytesPerRowY, + mode_lib->vba.DPTEBytesPerRowC, + mode_lib->vba.dpte_row_height[k], + mode_lib->vba.dpte_row_height_chroma[k], + &mode_lib->vba.meta_row_bw[k], + &mode_lib->vba.dpte_row_bw[k], + &mode_lib->vba.qual_row_bw[k]); + } + mode_lib->vba.ExtraLatency = + mode_lib->vba.UrgentRoundTripAndOutOfOrderLatencyPerState[i] + + (mode_lib->vba.TotalNumberOfActiveDPP[i][j] + * mode_lib->vba.PixelChunkSizeInKByte + + mode_lib->vba.TotalNumberOfDCCActiveDPP[i][j] + * mode_lib->vba.MetaChunkSize) + * 1024.0 + / mode_lib->vba.ReturnBWPerState[i][0]; + if (mode_lib->vba.GPUVMEnable == true) { + mode_lib->vba.ExtraLatency = mode_lib->vba.ExtraLatency + + mode_lib->vba.TotalNumberOfActiveDPP[i][j] + * mode_lib->vba.PTEGroupSize + / mode_lib->vba.ReturnBWPerState[i][0]; + } + mode_lib->vba.TimeCalc = 24.0 / mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0]; + + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.BlendingAndTiming[k] == k) { + if (mode_lib->vba.WritebackEnable[k] == true) { + locals->WritebackDelay[i][k] = mode_lib->vba.WritebackLatency + + CalculateWriteBackDelay( + mode_lib->vba.WritebackPixelFormat[k], + mode_lib->vba.WritebackHRatio[k], + mode_lib->vba.WritebackVRatio[k], + mode_lib->vba.WritebackLumaHTaps[k], + mode_lib->vba.WritebackLumaVTaps[k], + mode_lib->vba.WritebackChromaHTaps[k], + mode_lib->vba.WritebackChromaVTaps[k], + mode_lib->vba.WritebackDestinationWidth[k]) / locals->RequiredDISPCLK[i][j]; + } else { + locals->WritebackDelay[i][k] = 0.0; + } + for (m = 0; m <= mode_lib->vba.NumberOfActivePlanes - 1; m++) { + if (mode_lib->vba.BlendingAndTiming[m] == k + && mode_lib->vba.WritebackEnable[m] + == true) { + locals->WritebackDelay[i][k] = dml_max(locals->WritebackDelay[i][k], + mode_lib->vba.WritebackLatency + CalculateWriteBackDelay( + mode_lib->vba.WritebackPixelFormat[m], + mode_lib->vba.WritebackHRatio[m], + mode_lib->vba.WritebackVRatio[m], + mode_lib->vba.WritebackLumaHTaps[m], + mode_lib->vba.WritebackLumaVTaps[m], + mode_lib->vba.WritebackChromaHTaps[m], + mode_lib->vba.WritebackChromaVTaps[m], + mode_lib->vba.WritebackDestinationWidth[m]) / locals->RequiredDISPCLK[i][j]); + } + } + } + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + for (m = 0; m <= mode_lib->vba.NumberOfActivePlanes - 1; m++) { + if (mode_lib->vba.BlendingAndTiming[k] == m) { + locals->WritebackDelay[i][k] = locals->WritebackDelay[i][m]; + } + } + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + for (m = 0; m < locals->NumberOfCursors[k]; m++) + locals->cursor_bw[k] = locals->NumberOfCursors[k] * locals->CursorWidth[k][m] * locals->CursorBPP[k][m] + / 8 / (locals->HTotal[k] / locals->PixelClock[k]) * locals->VRatio[k]; + } + + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + locals->MaximumVStartup[0][0][k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k] + - dml_max(1.0, dml_ceil(locals->WritebackDelay[i][k] / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]), 1.0)); + } + + mode_lib->vba.NextPrefetchMode = mode_lib->vba.MinPrefetchMode; + do { + mode_lib->vba.PrefetchMode[i][j] = mode_lib->vba.NextPrefetchMode; + mode_lib->vba.NextPrefetchMode = mode_lib->vba.NextPrefetchMode + 1; + + mode_lib->vba.TWait = CalculateTWait( + mode_lib->vba.PrefetchMode[i][j], + mode_lib->vba.DRAMClockChangeLatency, + mode_lib->vba.UrgentLatency, + mode_lib->vba.SREnterPlusExitTime); + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + + if (mode_lib->vba.XFCEnabled[k] == true) { + mode_lib->vba.XFCRemoteSurfaceFlipDelay = + CalculateRemoteSurfaceFlipDelay( + mode_lib, + mode_lib->vba.VRatio[k], + locals->SwathWidthYPerState[i][j][k], + dml_ceil(locals->BytePerPixelInDETY[k], 1.0), + mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k], + mode_lib->vba.XFCTSlvVupdateOffset, + mode_lib->vba.XFCTSlvVupdateWidth, + mode_lib->vba.XFCTSlvVreadyOffset, + mode_lib->vba.XFCXBUFLatencyTolerance, + mode_lib->vba.XFCFillBWOverhead, + mode_lib->vba.XFCSlvChunkSize, + mode_lib->vba.XFCBusTransportTime, + mode_lib->vba.TimeCalc, + mode_lib->vba.TWait, + &mode_lib->vba.SrcActiveDrainRate, + &mode_lib->vba.TInitXFill, + &mode_lib->vba.TslvChk); + } else { + mode_lib->vba.XFCRemoteSurfaceFlipDelay = 0.0; + } + mode_lib->vba.IsErrorResult[i][j][k] = + CalculatePrefetchSchedule( + mode_lib, + mode_lib->vba.RequiredDPPCLK[i][j][k], + mode_lib->vba.RequiredDISPCLK[i][j], + mode_lib->vba.PixelClock[k], + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], + mode_lib->vba.DSCDelayPerState[i][k], + mode_lib->vba.NoOfDPP[i][j][k], + mode_lib->vba.ScalerEnabled[k], + mode_lib->vba.NumberOfCursors[k], + mode_lib->vba.DPPCLKDelaySubtotal, + mode_lib->vba.DPPCLKDelaySCL, + mode_lib->vba.DPPCLKDelaySCLLBOnly, + mode_lib->vba.DPPCLKDelayCNVCFormater, + mode_lib->vba.DPPCLKDelayCNVCCursor, + mode_lib->vba.DISPCLKDelaySubtotal, + mode_lib->vba.SwathWidthYPerState[i][j][k] + / mode_lib->vba.HRatio[k], + mode_lib->vba.OutputFormat[k], + mode_lib->vba.VTotal[k] + - mode_lib->vba.VActive[k], + mode_lib->vba.HTotal[k], + mode_lib->vba.MaxInterDCNTileRepeaters, + mode_lib->vba.MaximumVStartup[0][0][k], + mode_lib->vba.GPUVMMaxPageTableLevels, + mode_lib->vba.GPUVMEnable, + mode_lib->vba.DynamicMetadataEnable[k], + mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired[k], + mode_lib->vba.DynamicMetadataTransmittedBytes[k], + mode_lib->vba.DCCEnable[k], + mode_lib->vba.UrgentLatencyPixelDataOnly, + mode_lib->vba.ExtraLatency, + mode_lib->vba.TimeCalc, + mode_lib->vba.PDEAndMetaPTEBytesPerFrame[0][0][k], + mode_lib->vba.MetaRowBytes[0][0][k], + mode_lib->vba.DPTEBytesPerRow[0][0][k], + mode_lib->vba.PrefetchLinesY[0][0][k], + mode_lib->vba.SwathWidthYPerState[i][j][k], + mode_lib->vba.BytePerPixelInDETY[k], + mode_lib->vba.PrefillY[k], + mode_lib->vba.MaxNumSwY[k], + mode_lib->vba.PrefetchLinesC[0][0][k], + mode_lib->vba.BytePerPixelInDETC[k], + mode_lib->vba.PrefillC[k], + mode_lib->vba.MaxNumSwC[k], + mode_lib->vba.SwathHeightYPerState[i][j][k], + mode_lib->vba.SwathHeightCPerState[i][j][k], + mode_lib->vba.TWait, + mode_lib->vba.XFCEnabled[k], + mode_lib->vba.XFCRemoteSurfaceFlipDelay, + mode_lib->vba.Interlace[k], + mode_lib->vba.ProgressiveToInterlaceUnitInOPP, + mode_lib->vba.DSTXAfterScaler, + mode_lib->vba.DSTYAfterScaler, + &mode_lib->vba.LineTimesForPrefetch[k], + &mode_lib->vba.PrefetchBW[k], + &mode_lib->vba.LinesForMetaPTE[k], + &mode_lib->vba.LinesForMetaAndDPTERow[k], + &mode_lib->vba.VRatioPreY[i][j][k], + &mode_lib->vba.VRatioPreC[i][j][k], + &mode_lib->vba.RequiredPrefetchPixelDataBWLuma[i][j][k], + &mode_lib->vba.VStartupRequiredWhenNotEnoughTimeForDynamicMetadata, + &mode_lib->vba.Tno_bw[k], + &mode_lib->vba.VUpdateOffsetPix[k], + &mode_lib->vba.VUpdateWidthPix[k], + &mode_lib->vba.VReadyOffsetPix[k]); + } + mode_lib->vba.MaximumReadBandwidthWithoutPrefetch = 0.0; + mode_lib->vba.MaximumReadBandwidthWithPrefetch = 0.0; + locals->prefetch_vm_bw_valid = true; + locals->prefetch_row_bw_valid = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (locals->PDEAndMetaPTEBytesPerFrame[0][0][k] == 0) + locals->prefetch_vm_bw[k] = 0; + else if (locals->LinesForMetaPTE[k] > 0) + locals->prefetch_vm_bw[k] = locals->PDEAndMetaPTEBytesPerFrame[0][0][k] + / (locals->LinesForMetaPTE[k] * locals->HTotal[k] / locals->PixelClock[k]); + else { + locals->prefetch_vm_bw[k] = 0; + locals->prefetch_vm_bw_valid = false; + } + if (locals->MetaRowBytes[0][0][k] + locals->DPTEBytesPerRow[0][0][k] == 0) + locals->prefetch_row_bw[k] = 0; + else if (locals->LinesForMetaAndDPTERow[k] > 0) + locals->prefetch_row_bw[k] = (locals->MetaRowBytes[0][0][k] + locals->DPTEBytesPerRow[0][0][k]) + / (locals->LinesForMetaAndDPTERow[k] * locals->HTotal[k] / locals->PixelClock[k]); + else { + locals->prefetch_row_bw[k] = 0; + locals->prefetch_row_bw_valid = false; + } + + mode_lib->vba.MaximumReadBandwidthWithoutPrefetch = mode_lib->vba.MaximumReadBandwidthWithPrefetch + + mode_lib->vba.cursor_bw[k] + mode_lib->vba.ReadBandwidth[k] + mode_lib->vba.meta_row_bw[k] + mode_lib->vba.dpte_row_bw[k]; + mode_lib->vba.MaximumReadBandwidthWithPrefetch = + mode_lib->vba.MaximumReadBandwidthWithPrefetch + + mode_lib->vba.cursor_bw[k] + + dml_max3( + mode_lib->vba.prefetch_vm_bw[k], + mode_lib->vba.prefetch_row_bw[k], + dml_max(mode_lib->vba.ReadBandwidth[k], + mode_lib->vba.RequiredPrefetchPixelDataBWLuma[i][j][k]) + + mode_lib->vba.meta_row_bw[k] + mode_lib->vba.dpte_row_bw[k]); + } + locals->BandwidthWithoutPrefetchSupported[i][0] = true; + if (mode_lib->vba.MaximumReadBandwidthWithoutPrefetch > locals->ReturnBWPerState[i][0]) { + locals->BandwidthWithoutPrefetchSupported[i][0] = false; + } + + locals->PrefetchSupported[i][j] = true; + if (mode_lib->vba.MaximumReadBandwidthWithPrefetch > locals->ReturnBWPerState[i][0]) { + locals->PrefetchSupported[i][j] = false; + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (locals->LineTimesForPrefetch[k] < 2.0 + || locals->LinesForMetaPTE[k] >= 8.0 + || locals->LinesForMetaAndDPTERow[k] >= 16.0 + || mode_lib->vba.IsErrorResult[i][j][k] == true) { + locals->PrefetchSupported[i][j] = false; + } + } + locals->VRatioInPrefetchSupported[i][j] = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (locals->VRatioPreY[i][j][k] > 4.0 + || locals->VRatioPreC[i][j][k] > 4.0 + || mode_lib->vba.IsErrorResult[i][j][k] == true) { + locals->VRatioInPrefetchSupported[i][j] = false; + } + } + } while ((locals->PrefetchSupported[i][j] != true || locals->VRatioInPrefetchSupported[i][j] != true) + && mode_lib->vba.NextPrefetchMode < mode_lib->vba.MaxPrefetchMode); + + if (mode_lib->vba.PrefetchSupported[i][j] == true + && mode_lib->vba.VRatioInPrefetchSupported[i][j] == true) { + mode_lib->vba.BandwidthAvailableForImmediateFlip = + mode_lib->vba.ReturnBWPerState[i][0]; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + mode_lib->vba.BandwidthAvailableForImmediateFlip = + mode_lib->vba.BandwidthAvailableForImmediateFlip + - mode_lib->vba.cursor_bw[k] + - dml_max( + mode_lib->vba.ReadBandwidth[k] + mode_lib->vba.qual_row_bw[k], + mode_lib->vba.PrefetchBW[k]); + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + mode_lib->vba.ImmediateFlipBytes[k] = 0.0; + if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8 + && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) { + mode_lib->vba.ImmediateFlipBytes[k] = + mode_lib->vba.PDEAndMetaPTEBytesPerFrame[0][0][k] + + mode_lib->vba.MetaRowBytes[0][0][k] + + mode_lib->vba.DPTEBytesPerRow[0][0][k]; + } + } + mode_lib->vba.TotImmediateFlipBytes = 0.0; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8 + && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) { + mode_lib->vba.TotImmediateFlipBytes = + mode_lib->vba.TotImmediateFlipBytes + + mode_lib->vba.ImmediateFlipBytes[k]; + } + } + + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + CalculateFlipSchedule( + mode_lib, + mode_lib->vba.ExtraLatency, + mode_lib->vba.UrgentLatencyPixelDataOnly, + mode_lib->vba.GPUVMMaxPageTableLevels, + mode_lib->vba.GPUVMEnable, + mode_lib->vba.BandwidthAvailableForImmediateFlip, + mode_lib->vba.TotImmediateFlipBytes, + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.ImmediateFlipBytes[k], + mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k], + mode_lib->vba.VRatio[k], + mode_lib->vba.Tno_bw[k], + mode_lib->vba.PDEAndMetaPTEBytesPerFrame[0][0][k], + mode_lib->vba.MetaRowBytes[0][0][k], + mode_lib->vba.DPTEBytesPerRow[0][0][k], + mode_lib->vba.DCCEnable[k], + mode_lib->vba.dpte_row_height[k], + mode_lib->vba.meta_row_height[k], + mode_lib->vba.qual_row_bw[k], + &mode_lib->vba.DestinationLinesToRequestVMInImmediateFlip[k], + &mode_lib->vba.DestinationLinesToRequestRowInImmediateFlip[k], + &mode_lib->vba.final_flip_bw[k], + &mode_lib->vba.ImmediateFlipSupportedForPipe[k]); + } + mode_lib->vba.total_dcn_read_bw_with_flip = 0.0; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + mode_lib->vba.total_dcn_read_bw_with_flip = + mode_lib->vba.total_dcn_read_bw_with_flip + + mode_lib->vba.cursor_bw[k] + + dml_max3( + mode_lib->vba.prefetch_vm_bw[k], + mode_lib->vba.prefetch_row_bw[k], + mode_lib->vba.final_flip_bw[k] + + dml_max( + mode_lib->vba.ReadBandwidth[k], + mode_lib->vba.RequiredPrefetchPixelDataBWLuma[i][j][k])); + } + mode_lib->vba.ImmediateFlipSupportedForState[i][j] = true; + if (mode_lib->vba.total_dcn_read_bw_with_flip + > mode_lib->vba.ReturnBWPerState[i][0]) { + mode_lib->vba.ImmediateFlipSupportedForState[i][j] = false; + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.ImmediateFlipSupportedForPipe[k] == false) { + mode_lib->vba.ImmediateFlipSupportedForState[i][j] = false; + } + } + } else { + mode_lib->vba.ImmediateFlipSupportedForState[i][j] = false; + } + } + } + + /*Vertical Active BW support*/ + mode_lib->vba.MaxTotalVActiveRDBandwidth = 0; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; k++) + mode_lib->vba.MaxTotalVActiveRDBandwidth = mode_lib->vba.MaxTotalVActiveRDBandwidth + mode_lib->vba.ReadBandwidth[k]; + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i][0] = dml_min(mode_lib->vba.ReturnBusWidth * + mode_lib->vba.DCFCLKPerState[i], mode_lib->vba.FabricAndDRAMBandwidthPerState[i] * 1000) * + mode_lib->vba.MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100; + if (mode_lib->vba.MaxTotalVActiveRDBandwidth <= mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i][0]) + mode_lib->vba.TotalVerticalActiveBandwidthSupport[i][0] = true; + else + mode_lib->vba.TotalVerticalActiveBandwidthSupport[i][0] = false; + } + + /*PTE Buffer Size Check*/ + + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + for (j = 0; j < 2; j++) { + locals->PTEBufferSizeNotExceeded[i][j] = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (locals->PTEBufferSizeNotExceededY[i][j][k] == false + || locals->PTEBufferSizeNotExceededC[i][j][k] == false) { + locals->PTEBufferSizeNotExceeded[i][j] = false; + } + } + } + } + /*Cursor Support Check*/ + mode_lib->vba.CursorSupport = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + for (j = 0; j < 2; j++) { + if (mode_lib->vba.CursorWidth[k][j] > 0.0) { + if (dml_floor( + dml_floor( + mode_lib->vba.CursorBufferSize + - mode_lib->vba.CursorChunkSize, + mode_lib->vba.CursorChunkSize) * 1024.0 + / (mode_lib->vba.CursorWidth[k][j] + * mode_lib->vba.CursorBPP[k][j] + / 8.0), + 1.0) + * (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) + / mode_lib->vba.VRatio[k] < mode_lib->vba.UrgentLatencyPixelDataOnly + || (mode_lib->vba.CursorBPP[k][j] == 64.0 + && mode_lib->vba.Cursor64BppSupport == false)) { + mode_lib->vba.CursorSupport = false; + } + } + } + } + /*Valid Pitch Check*/ + + mode_lib->vba.PitchSupport = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + locals->AlignedYPitch[k] = dml_ceil( + dml_max(mode_lib->vba.PitchY[k], mode_lib->vba.ViewportWidth[k]), + locals->MacroTileWidthY[k]); + if (locals->AlignedYPitch[k] > mode_lib->vba.PitchY[k]) { + mode_lib->vba.PitchSupport = false; + } + if (mode_lib->vba.DCCEnable[k] == true) { + locals->AlignedDCCMetaPitch[k] = dml_ceil( + dml_max( + mode_lib->vba.DCCMetaPitchY[k], + mode_lib->vba.ViewportWidth[k]), + 64.0 * locals->Read256BlockWidthY[k]); + } else { + locals->AlignedDCCMetaPitch[k] = mode_lib->vba.DCCMetaPitchY[k]; + } + if (locals->AlignedDCCMetaPitch[k] > mode_lib->vba.DCCMetaPitchY[k]) { + mode_lib->vba.PitchSupport = false; + } + if (mode_lib->vba.SourcePixelFormat[k] != dm_444_64 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8) { + locals->AlignedCPitch[k] = dml_ceil( + dml_max( + mode_lib->vba.PitchC[k], + mode_lib->vba.ViewportWidth[k] / 2.0), + locals->MacroTileWidthC[k]); + } else { + locals->AlignedCPitch[k] = mode_lib->vba.PitchC[k]; + } + if (locals->AlignedCPitch[k] > mode_lib->vba.PitchC[k]) { + mode_lib->vba.PitchSupport = false; + } + } + /*Mode Support, Voltage State and SOC Configuration*/ + + for (i = mode_lib->vba.soc.num_states; i >= 0; i--) { + for (j = 0; j < 2; j++) { + enum dm_validation_status status = DML_VALIDATION_OK; + + if (mode_lib->vba.ScaleRatioAndTapsSupport != true) { + status = DML_FAIL_SCALE_RATIO_TAP; + } else if (mode_lib->vba.SourceFormatPixelAndScanSupport != true) { + status = DML_FAIL_SOURCE_PIXEL_FORMAT; + } else if (locals->ViewportSizeSupport[i][0] != true) { + status = DML_FAIL_VIEWPORT_SIZE; + } else if (locals->DIOSupport[i] != true) { + status = DML_FAIL_DIO_SUPPORT; + } else if (locals->NotEnoughDSCUnits[i] != false) { + status = DML_FAIL_NOT_ENOUGH_DSC; + } else if (locals->DSCCLKRequiredMoreThanSupported[i] != false) { + status = DML_FAIL_DSC_CLK_REQUIRED; + } else if (locals->UrgentLatencySupport[i][j] != true) { + status = DML_FAIL_URGENT_LATENCY; + } else if (locals->ROBSupport[i][0] != true) { + status = DML_FAIL_REORDERING_BUFFER; + } else if (locals->DISPCLK_DPPCLK_Support[i][j] != true) { + status = DML_FAIL_DISPCLK_DPPCLK; + } else if (locals->TotalAvailablePipesSupport[i][j] != true) { + status = DML_FAIL_TOTAL_AVAILABLE_PIPES; + } else if (mode_lib->vba.NumberOfOTGSupport != true) { + status = DML_FAIL_NUM_OTG; + } else if (mode_lib->vba.WritebackModeSupport != true) { + status = DML_FAIL_WRITEBACK_MODE; + } else if (mode_lib->vba.WritebackLatencySupport != true) { + status = DML_FAIL_WRITEBACK_LATENCY; + } else if (mode_lib->vba.WritebackScaleRatioAndTapsSupport != true) { + status = DML_FAIL_WRITEBACK_SCALE_RATIO_TAP; + } else if (mode_lib->vba.CursorSupport != true) { + status = DML_FAIL_CURSOR_SUPPORT; + } else if (mode_lib->vba.PitchSupport != true) { + status = DML_FAIL_PITCH_SUPPORT; + } else if (locals->PrefetchSupported[i][j] != true) { + status = DML_FAIL_PREFETCH_SUPPORT; + } else if (locals->TotalVerticalActiveBandwidthSupport[i][0] != true) { + status = DML_FAIL_TOTAL_V_ACTIVE_BW; + } else if (locals->VRatioInPrefetchSupported[i][j] != true) { + status = DML_FAIL_V_RATIO_PREFETCH; + } else if (locals->PTEBufferSizeNotExceeded[i][j] != true) { + status = DML_FAIL_PTE_BUFFER_SIZE; + } else if (mode_lib->vba.NonsupportedDSCInputBPC != false) { + status = DML_FAIL_DSC_INPUT_BPC; + } + + if (status == DML_VALIDATION_OK) { + locals->ModeSupport[i][j] = true; + } else { + locals->ModeSupport[i][j] = false; + } + locals->ValidationStatus[i] = status; + } + } + { + unsigned int MaximumMPCCombine = 0; + mode_lib->vba.VoltageLevel = mode_lib->vba.soc.num_states + 1; + for (i = mode_lib->vba.VoltageOverrideLevel; i <= mode_lib->vba.soc.num_states; i++) { + if (locals->ModeSupport[i][0] == true || locals->ModeSupport[i][1] == true) { + mode_lib->vba.VoltageLevel = i; + if (locals->ModeSupport[i][1] == true && (locals->ModeSupport[i][0] == false + || mode_lib->vba.WhenToDoMPCCombine == dm_mpc_always_when_possible)) { + MaximumMPCCombine = 1; + } else { + MaximumMPCCombine = 0; + } + break; + } + } + mode_lib->vba.ImmediateFlipSupport = + locals->ImmediateFlipSupportedForState[mode_lib->vba.VoltageLevel][MaximumMPCCombine]; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + mode_lib->vba.DPPPerPlane[k] = locals->NoOfDPP[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k]; + locals->DPPCLK[k] = locals->RequiredDPPCLK[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k]; + } + mode_lib->vba.DISPCLK = locals->RequiredDISPCLK[mode_lib->vba.VoltageLevel][MaximumMPCCombine]; + mode_lib->vba.maxMpcComb = MaximumMPCCombine; + } + mode_lib->vba.DCFCLK = mode_lib->vba.DCFCLKPerState[mode_lib->vba.VoltageLevel]; + mode_lib->vba.DRAMSpeed = mode_lib->vba.DRAMSpeedPerState[mode_lib->vba.VoltageLevel]; + mode_lib->vba.FabricClock = mode_lib->vba.FabricClockPerState[mode_lib->vba.VoltageLevel]; + mode_lib->vba.SOCCLK = mode_lib->vba.SOCCLKPerState[mode_lib->vba.VoltageLevel]; + mode_lib->vba.ReturnBW = locals->ReturnBWPerState[mode_lib->vba.VoltageLevel][0]; + mode_lib->vba.FabricAndDRAMBandwidth = locals->FabricAndDRAMBandwidthPerState[mode_lib->vba.VoltageLevel]; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.BlendingAndTiming[k] == k) { + mode_lib->vba.ODMCombineEnabled[k] = + locals->ODMCombineEnablePerState[mode_lib->vba.VoltageLevel][k]; + } else { + mode_lib->vba.ODMCombineEnabled[k] = 0; + } + mode_lib->vba.DSCEnabled[k] = + locals->RequiresDSC[mode_lib->vba.VoltageLevel][k]; + mode_lib->vba.OutputBpp[k] = + locals->OutputBppPerState[mode_lib->vba.VoltageLevel][k]; + } +} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.h b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.h new file mode 100644 index 000000000..92b6805f4 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.h @@ -0,0 +1,32 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef _DCN20_DISPLAY_MODE_VBA_H_ +#define _DCN20_DISPLAY_MODE_VBA_H_ + +void dml20_recalculate(struct display_mode_lib *mode_lib); +void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c new file mode 100644 index 000000000..989d83ee3 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c @@ -0,0 +1,5234 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "../display_mode_lib.h" +#include "display_mode_vba_20v2.h" +#include "../dml_inline_defs.h" + +/* + * NOTE: + * This file is gcc-parseable HW gospel, coming straight from HW engineers. + * + * It doesn't adhere to Linux kernel style and sometimes will do things in odd + * ways. Unless there is something clearly wrong with it the code should + * remain as-is as it provides us with a guarantee from HW that it is correct. + */ + +#define BPP_INVALID 0 +#define BPP_BLENDED_PIPE 0xffffffff +#define DCN20_MAX_DSC_IMAGE_WIDTH 5184 +#define DCN20_MAX_420_IMAGE_WIDTH 4096 + +static double adjust_ReturnBW( + struct display_mode_lib *mode_lib, + double ReturnBW, + bool DCCEnabledAnyPlane, + double ReturnBandwidthToDCN); +static unsigned int dscceComputeDelay( + unsigned int bpc, + double bpp, + unsigned int sliceWidth, + unsigned int numSlices, + enum output_format_class pixelFormat); +static unsigned int dscComputeDelay(enum output_format_class pixelFormat); +static bool CalculateDelayAfterScaler( + struct display_mode_lib *mode_lib, + double ReturnBW, + double ReadBandwidthPlaneLuma, + double ReadBandwidthPlaneChroma, + double TotalDataReadBandwidth, + double DisplayPipeLineDeliveryTimeLuma, + double DisplayPipeLineDeliveryTimeChroma, + double DPPCLK, + double DISPCLK, + double PixelClock, + unsigned int DSCDelay, + unsigned int DPPPerPlane, + bool ScalerEnabled, + unsigned int NumberOfCursors, + double DPPCLKDelaySubtotal, + double DPPCLKDelaySCL, + double DPPCLKDelaySCLLBOnly, + double DPPCLKDelayCNVCFormater, + double DPPCLKDelayCNVCCursor, + double DISPCLKDelaySubtotal, + unsigned int ScalerRecoutWidth, + enum output_format_class OutputFormat, + unsigned int HTotal, + unsigned int SwathWidthSingleDPPY, + double BytePerPixelDETY, + double BytePerPixelDETC, + unsigned int SwathHeightY, + unsigned int SwathHeightC, + bool Interlace, + bool ProgressiveToInterlaceUnitInOPP, + double *DSTXAfterScaler, + double *DSTYAfterScaler + ); +// Super monster function with some 45 argument +static bool CalculatePrefetchSchedule( + struct display_mode_lib *mode_lib, + double DPPCLK, + double DISPCLK, + double PixelClock, + double DCFCLKDeepSleep, + unsigned int DPPPerPlane, + unsigned int NumberOfCursors, + unsigned int VBlank, + unsigned int HTotal, + unsigned int MaxInterDCNTileRepeaters, + unsigned int VStartup, + unsigned int PageTableLevels, + bool GPUVMEnable, + bool DynamicMetadataEnable, + unsigned int DynamicMetadataLinesBeforeActiveRequired, + unsigned int DynamicMetadataTransmittedBytes, + bool DCCEnable, + double UrgentLatencyPixelDataOnly, + double UrgentExtraLatency, + double TCalc, + unsigned int PDEAndMetaPTEBytesFrame, + unsigned int MetaRowByte, + unsigned int PixelPTEBytesPerRow, + double PrefetchSourceLinesY, + unsigned int SwathWidthY, + double BytePerPixelDETY, + double VInitPreFillY, + unsigned int MaxNumSwathY, + double PrefetchSourceLinesC, + double BytePerPixelDETC, + double VInitPreFillC, + unsigned int MaxNumSwathC, + unsigned int SwathHeightY, + unsigned int SwathHeightC, + double TWait, + bool XFCEnabled, + double XFCRemoteSurfaceFlipDelay, + bool InterlaceEnable, + bool ProgressiveToInterlaceUnitInOPP, + double DSTXAfterScaler, + double DSTYAfterScaler, + double *DestinationLinesForPrefetch, + double *PrefetchBandwidth, + double *DestinationLinesToRequestVMInVBlank, + double *DestinationLinesToRequestRowInVBlank, + double *VRatioPrefetchY, + double *VRatioPrefetchC, + double *RequiredPrefetchPixDataBW, + double *Tno_bw, + unsigned int *VUpdateOffsetPix, + double *VUpdateWidthPix, + double *VReadyOffsetPix); +static double RoundToDFSGranularityUp(double Clock, double VCOSpeed); +static double RoundToDFSGranularityDown(double Clock, double VCOSpeed); +static double CalculatePrefetchSourceLines( + struct display_mode_lib *mode_lib, + double VRatio, + double vtaps, + bool Interlace, + bool ProgressiveToInterlaceUnitInOPP, + unsigned int SwathHeight, + unsigned int ViewportYStart, + double *VInitPreFill, + unsigned int *MaxNumSwath); +static unsigned int CalculateVMAndRowBytes( + struct display_mode_lib *mode_lib, + bool DCCEnable, + unsigned int BlockHeight256Bytes, + unsigned int BlockWidth256Bytes, + enum source_format_class SourcePixelFormat, + unsigned int SurfaceTiling, + unsigned int BytePerPixel, + enum scan_direction_class ScanDirection, + unsigned int ViewportWidth, + unsigned int ViewportHeight, + unsigned int SwathWidthY, + bool GPUVMEnable, + unsigned int VMMPageSize, + unsigned int PTEBufferSizeInRequestsLuma, + unsigned int PDEProcessingBufIn64KBReqs, + unsigned int Pitch, + unsigned int DCCMetaPitch, + unsigned int *MacroTileWidth, + unsigned int *MetaRowByte, + unsigned int *PixelPTEBytesPerRow, + bool *PTEBufferSizeNotExceeded, + unsigned int *dpte_row_height, + unsigned int *meta_row_height); +static double CalculateTWait( + unsigned int PrefetchMode, + double DRAMClockChangeLatency, + double UrgentLatencyPixelDataOnly, + double SREnterPlusExitTime); +static double CalculateRemoteSurfaceFlipDelay( + struct display_mode_lib *mode_lib, + double VRatio, + double SwathWidth, + double Bpp, + double LineTime, + double XFCTSlvVupdateOffset, + double XFCTSlvVupdateWidth, + double XFCTSlvVreadyOffset, + double XFCXBUFLatencyTolerance, + double XFCFillBWOverhead, + double XFCSlvChunkSize, + double XFCBusTransportTime, + double TCalc, + double TWait, + double *SrcActiveDrainRate, + double *TInitXFill, + double *TslvChk); +static void CalculateActiveRowBandwidth( + bool GPUVMEnable, + enum source_format_class SourcePixelFormat, + double VRatio, + bool DCCEnable, + double LineTime, + unsigned int MetaRowByteLuma, + unsigned int MetaRowByteChroma, + unsigned int meta_row_height_luma, + unsigned int meta_row_height_chroma, + unsigned int PixelPTEBytesPerRowLuma, + unsigned int PixelPTEBytesPerRowChroma, + unsigned int dpte_row_height_luma, + unsigned int dpte_row_height_chroma, + double *meta_row_bw, + double *dpte_row_bw, + double *qual_row_bw); +static void CalculateFlipSchedule( + struct display_mode_lib *mode_lib, + double UrgentExtraLatency, + double UrgentLatencyPixelDataOnly, + unsigned int GPUVMMaxPageTableLevels, + bool GPUVMEnable, + double BandwidthAvailableForImmediateFlip, + unsigned int TotImmediateFlipBytes, + enum source_format_class SourcePixelFormat, + unsigned int ImmediateFlipBytes, + double LineTime, + double VRatio, + double Tno_bw, + double PDEAndMetaPTEBytesFrame, + unsigned int MetaRowByte, + unsigned int PixelPTEBytesPerRow, + bool DCCEnable, + unsigned int dpte_row_height, + unsigned int meta_row_height, + double qual_row_bw, + double *DestinationLinesToRequestVMInImmediateFlip, + double *DestinationLinesToRequestRowInImmediateFlip, + double *final_flip_bw, + bool *ImmediateFlipSupportedForPipe); +static double CalculateWriteBackDelay( + enum source_format_class WritebackPixelFormat, + double WritebackHRatio, + double WritebackVRatio, + unsigned int WritebackLumaHTaps, + unsigned int WritebackLumaVTaps, + unsigned int WritebackChromaHTaps, + unsigned int WritebackChromaVTaps, + unsigned int WritebackDestinationWidth); + +static void dml20v2_DisplayPipeConfiguration(struct display_mode_lib *mode_lib); +static void dml20v2_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation( + struct display_mode_lib *mode_lib); + +void dml20v2_recalculate(struct display_mode_lib *mode_lib) +{ + ModeSupportAndSystemConfiguration(mode_lib); + mode_lib->vba.FabricAndDRAMBandwidth = dml_min( + mode_lib->vba.DRAMSpeed * mode_lib->vba.NumberOfChannels * mode_lib->vba.DRAMChannelWidth, + mode_lib->vba.FabricClock * mode_lib->vba.FabricDatapathToDCNDataReturn) / 1000.0; + PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib); + dml20v2_DisplayPipeConfiguration(mode_lib); + dml20v2_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib); +} + +static double adjust_ReturnBW( + struct display_mode_lib *mode_lib, + double ReturnBW, + bool DCCEnabledAnyPlane, + double ReturnBandwidthToDCN) +{ + double CriticalCompression; + + if (DCCEnabledAnyPlane + && ReturnBandwidthToDCN + > mode_lib->vba.DCFCLK * mode_lib->vba.ReturnBusWidth / 4.0) + ReturnBW = + dml_min( + ReturnBW, + ReturnBandwidthToDCN * 4 + * (1.0 + - mode_lib->vba.UrgentLatencyPixelDataOnly + / ((mode_lib->vba.ROBBufferSizeInKByte + - mode_lib->vba.PixelChunkSizeInKByte) + * 1024 + / ReturnBandwidthToDCN + - mode_lib->vba.DCFCLK + * mode_lib->vba.ReturnBusWidth + / 4) + + mode_lib->vba.UrgentLatencyPixelDataOnly)); + + CriticalCompression = 2.0 * mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLK + * mode_lib->vba.UrgentLatencyPixelDataOnly + / (ReturnBandwidthToDCN * mode_lib->vba.UrgentLatencyPixelDataOnly + + (mode_lib->vba.ROBBufferSizeInKByte + - mode_lib->vba.PixelChunkSizeInKByte) + * 1024); + + if (DCCEnabledAnyPlane && CriticalCompression > 1.0 && CriticalCompression < 4.0) + ReturnBW = + dml_min( + ReturnBW, + 4.0 * ReturnBandwidthToDCN + * (mode_lib->vba.ROBBufferSizeInKByte + - mode_lib->vba.PixelChunkSizeInKByte) + * 1024 + * mode_lib->vba.ReturnBusWidth + * mode_lib->vba.DCFCLK + * mode_lib->vba.UrgentLatencyPixelDataOnly + / dml_pow( + (ReturnBandwidthToDCN + * mode_lib->vba.UrgentLatencyPixelDataOnly + + (mode_lib->vba.ROBBufferSizeInKByte + - mode_lib->vba.PixelChunkSizeInKByte) + * 1024), + 2)); + + return ReturnBW; +} + +static unsigned int dscceComputeDelay( + unsigned int bpc, + double bpp, + unsigned int sliceWidth, + unsigned int numSlices, + enum output_format_class pixelFormat) +{ + // valid bpc = source bits per component in the set of {8, 10, 12} + // valid bpp = increments of 1/16 of a bit + // min = 6/7/8 in N420/N422/444, respectively + // max = such that compression is 1:1 + //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode) + //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4} + //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420} + + // fixed value + unsigned int rcModelSize = 8192; + + // N422/N420 operate at 2 pixels per clock + unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, l, + Delay, pixels; + + if (pixelFormat == dm_n422 || pixelFormat == dm_420) + pixelsPerClock = 2; + // #all other modes operate at 1 pixel per clock + else + pixelsPerClock = 1; + + //initial transmit delay as per PPS + initalXmitDelay = dml_round(rcModelSize / 2.0 / bpp / pixelsPerClock); + + //compute ssm delay + if (bpc == 8) + D = 81; + else if (bpc == 10) + D = 89; + else + D = 113; + + //divide by pixel per cycle to compute slice width as seen by DSC + w = sliceWidth / pixelsPerClock; + + //422 mode has an additional cycle of delay + if (pixelFormat == dm_s422) + s = 1; + else + s = 0; + + //main calculation for the dscce + ix = initalXmitDelay + 45; + wx = (w + 2) / 3; + p = 3 * wx - w; + l0 = ix / w; + a = ix + p * l0; + ax = (a + 2) / 3 + D + 6 + 1; + l = (ax + wx - 1) / wx; + if ((ix % w) == 0 && p != 0) + lstall = 1; + else + lstall = 0; + Delay = l * wx * (numSlices - 1) + ax + s + lstall + 22; + + //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels + pixels = Delay * 3 * pixelsPerClock; + return pixels; +} + +static unsigned int dscComputeDelay(enum output_format_class pixelFormat) +{ + unsigned int Delay = 0; + + if (pixelFormat == dm_420) { + // sfr + Delay = Delay + 2; + // dsccif + Delay = Delay + 0; + // dscc - input deserializer + Delay = Delay + 3; + // dscc gets pixels every other cycle + Delay = Delay + 2; + // dscc - input cdc fifo + Delay = Delay + 12; + // dscc gets pixels every other cycle + Delay = Delay + 13; + // dscc - cdc uncertainty + Delay = Delay + 2; + // dscc - output cdc fifo + Delay = Delay + 7; + // dscc gets pixels every other cycle + Delay = Delay + 3; + // dscc - cdc uncertainty + Delay = Delay + 2; + // dscc - output serializer + Delay = Delay + 1; + // sft + Delay = Delay + 1; + } else if (pixelFormat == dm_n422) { + // sfr + Delay = Delay + 2; + // dsccif + Delay = Delay + 1; + // dscc - input deserializer + Delay = Delay + 5; + // dscc - input cdc fifo + Delay = Delay + 25; + // dscc - cdc uncertainty + Delay = Delay + 2; + // dscc - output cdc fifo + Delay = Delay + 10; + // dscc - cdc uncertainty + Delay = Delay + 2; + // dscc - output serializer + Delay = Delay + 1; + // sft + Delay = Delay + 1; + } else { + // sfr + Delay = Delay + 2; + // dsccif + Delay = Delay + 0; + // dscc - input deserializer + Delay = Delay + 3; + // dscc - input cdc fifo + Delay = Delay + 12; + // dscc - cdc uncertainty + Delay = Delay + 2; + // dscc - output cdc fifo + Delay = Delay + 7; + // dscc - output serializer + Delay = Delay + 1; + // dscc - cdc uncertainty + Delay = Delay + 2; + // sft + Delay = Delay + 1; + } + + return Delay; +} + +static bool CalculateDelayAfterScaler( + struct display_mode_lib *mode_lib, + double ReturnBW, + double ReadBandwidthPlaneLuma, + double ReadBandwidthPlaneChroma, + double TotalDataReadBandwidth, + double DisplayPipeLineDeliveryTimeLuma, + double DisplayPipeLineDeliveryTimeChroma, + double DPPCLK, + double DISPCLK, + double PixelClock, + unsigned int DSCDelay, + unsigned int DPPPerPlane, + bool ScalerEnabled, + unsigned int NumberOfCursors, + double DPPCLKDelaySubtotal, + double DPPCLKDelaySCL, + double DPPCLKDelaySCLLBOnly, + double DPPCLKDelayCNVCFormater, + double DPPCLKDelayCNVCCursor, + double DISPCLKDelaySubtotal, + unsigned int ScalerRecoutWidth, + enum output_format_class OutputFormat, + unsigned int HTotal, + unsigned int SwathWidthSingleDPPY, + double BytePerPixelDETY, + double BytePerPixelDETC, + unsigned int SwathHeightY, + unsigned int SwathHeightC, + bool Interlace, + bool ProgressiveToInterlaceUnitInOPP, + double *DSTXAfterScaler, + double *DSTYAfterScaler + ) +{ + unsigned int DPPCycles, DISPCLKCycles; + double DataFabricLineDeliveryTimeLuma; + double DataFabricLineDeliveryTimeChroma; + double DSTTotalPixelsAfterScaler; + + DataFabricLineDeliveryTimeLuma = SwathWidthSingleDPPY * SwathHeightY * dml_ceil(BytePerPixelDETY, 1) / (mode_lib->vba.ReturnBW * ReadBandwidthPlaneLuma / TotalDataReadBandwidth); + mode_lib->vba.LastPixelOfLineExtraWatermark = dml_max(mode_lib->vba.LastPixelOfLineExtraWatermark, DataFabricLineDeliveryTimeLuma - DisplayPipeLineDeliveryTimeLuma); + + if (BytePerPixelDETC != 0) { + DataFabricLineDeliveryTimeChroma = SwathWidthSingleDPPY / 2 * SwathHeightC * dml_ceil(BytePerPixelDETC, 2) / (mode_lib->vba.ReturnBW * ReadBandwidthPlaneChroma / TotalDataReadBandwidth); + mode_lib->vba.LastPixelOfLineExtraWatermark = dml_max(mode_lib->vba.LastPixelOfLineExtraWatermark, DataFabricLineDeliveryTimeChroma - DisplayPipeLineDeliveryTimeChroma); + } + + if (ScalerEnabled) + DPPCycles = DPPCLKDelaySubtotal + DPPCLKDelaySCL; + else + DPPCycles = DPPCLKDelaySubtotal + DPPCLKDelaySCLLBOnly; + + DPPCycles = DPPCycles + DPPCLKDelayCNVCFormater + NumberOfCursors * DPPCLKDelayCNVCCursor; + + DISPCLKCycles = DISPCLKDelaySubtotal; + + if (DPPCLK == 0.0 || DISPCLK == 0.0) + return true; + + *DSTXAfterScaler = DPPCycles * PixelClock / DPPCLK + DISPCLKCycles * PixelClock / DISPCLK + + DSCDelay; + + if (DPPPerPlane > 1) + *DSTXAfterScaler = *DSTXAfterScaler + ScalerRecoutWidth; + + if (OutputFormat == dm_420 || (Interlace && ProgressiveToInterlaceUnitInOPP)) + *DSTYAfterScaler = 1; + else + *DSTYAfterScaler = 0; + + DSTTotalPixelsAfterScaler = ((double) (*DSTYAfterScaler * HTotal)) + *DSTXAfterScaler; + *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / HTotal, 1); + *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * HTotal)); + + return true; +} + +static bool CalculatePrefetchSchedule( + struct display_mode_lib *mode_lib, + double DPPCLK, + double DISPCLK, + double PixelClock, + double DCFCLKDeepSleep, + unsigned int DPPPerPlane, + unsigned int NumberOfCursors, + unsigned int VBlank, + unsigned int HTotal, + unsigned int MaxInterDCNTileRepeaters, + unsigned int VStartup, + unsigned int PageTableLevels, + bool GPUVMEnable, + bool DynamicMetadataEnable, + unsigned int DynamicMetadataLinesBeforeActiveRequired, + unsigned int DynamicMetadataTransmittedBytes, + bool DCCEnable, + double UrgentLatencyPixelDataOnly, + double UrgentExtraLatency, + double TCalc, + unsigned int PDEAndMetaPTEBytesFrame, + unsigned int MetaRowByte, + unsigned int PixelPTEBytesPerRow, + double PrefetchSourceLinesY, + unsigned int SwathWidthY, + double BytePerPixelDETY, + double VInitPreFillY, + unsigned int MaxNumSwathY, + double PrefetchSourceLinesC, + double BytePerPixelDETC, + double VInitPreFillC, + unsigned int MaxNumSwathC, + unsigned int SwathHeightY, + unsigned int SwathHeightC, + double TWait, + bool XFCEnabled, + double XFCRemoteSurfaceFlipDelay, + bool InterlaceEnable, + bool ProgressiveToInterlaceUnitInOPP, + double DSTXAfterScaler, + double DSTYAfterScaler, + double *DestinationLinesForPrefetch, + double *PrefetchBandwidth, + double *DestinationLinesToRequestVMInVBlank, + double *DestinationLinesToRequestRowInVBlank, + double *VRatioPrefetchY, + double *VRatioPrefetchC, + double *RequiredPrefetchPixDataBW, + double *Tno_bw, + unsigned int *VUpdateOffsetPix, + double *VUpdateWidthPix, + double *VReadyOffsetPix) +{ + bool MyError = false; + double TotalRepeaterDelayTime; + double Tdm, LineTime, Tsetup; + double dst_y_prefetch_equ; + double Tsw_oto; + double prefetch_bw_oto; + double Tvm_oto; + double Tr0_oto; + double Tpre_oto; + double dst_y_prefetch_oto; + double TimeForFetchingMetaPTE = 0; + double TimeForFetchingRowInVBlank = 0; + double LinesToRequestPrefetchPixelData = 0; + + *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1); + TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2.0 / DPPCLK + 3.0 / DISPCLK); + *VUpdateWidthPix = (14.0 / DCFCLKDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime) + * PixelClock; + + *VReadyOffsetPix = dml_max( + 150.0 / DPPCLK, + TotalRepeaterDelayTime + 20.0 / DCFCLKDeepSleep + 10.0 / DPPCLK) + * PixelClock; + + Tsetup = (double) (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock; + + LineTime = (double) HTotal / PixelClock; + + if (DynamicMetadataEnable) { + double Tdmbf, Tdmec, Tdmsks; + + Tdm = dml_max(0.0, UrgentExtraLatency - TCalc); + Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK; + Tdmec = LineTime; + if (DynamicMetadataLinesBeforeActiveRequired == 0) + Tdmsks = VBlank * LineTime / 2.0; + else + Tdmsks = DynamicMetadataLinesBeforeActiveRequired * LineTime; + if (InterlaceEnable && !ProgressiveToInterlaceUnitInOPP) + Tdmsks = Tdmsks / 2; + if (VStartup * LineTime + < Tsetup + TWait + UrgentExtraLatency + Tdmbf + Tdmec + Tdmsks) { + MyError = true; + } + } else + Tdm = 0; + + if (GPUVMEnable) { + if (PageTableLevels == 4) + *Tno_bw = UrgentExtraLatency + UrgentLatencyPixelDataOnly; + else if (PageTableLevels == 3) + *Tno_bw = UrgentExtraLatency; + else + *Tno_bw = 0; + } else if (DCCEnable) + *Tno_bw = LineTime; + else + *Tno_bw = LineTime / 4; + + dst_y_prefetch_equ = VStartup - dml_max(TCalc + TWait, XFCRemoteSurfaceFlipDelay) / LineTime + - (Tsetup + Tdm) / LineTime + - (DSTYAfterScaler + DSTXAfterScaler / HTotal); + + Tsw_oto = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime; + + prefetch_bw_oto = (MetaRowByte + PixelPTEBytesPerRow + + PrefetchSourceLinesY * SwathWidthY * dml_ceil(BytePerPixelDETY, 1) + + PrefetchSourceLinesC * SwathWidthY / 2 * dml_ceil(BytePerPixelDETC, 2)) + / Tsw_oto; + + if (GPUVMEnable == true) { + Tvm_oto = + dml_max( + *Tno_bw + PDEAndMetaPTEBytesFrame / prefetch_bw_oto, + dml_max( + UrgentExtraLatency + + UrgentLatencyPixelDataOnly + * (PageTableLevels + - 1), + LineTime / 4.0)); + } else + Tvm_oto = LineTime / 4.0; + + if ((GPUVMEnable == true || DCCEnable == true)) { + Tr0_oto = dml_max( + (MetaRowByte + PixelPTEBytesPerRow) / prefetch_bw_oto, + dml_max(UrgentLatencyPixelDataOnly, dml_max(LineTime - Tvm_oto, LineTime / 4))); + } else + Tr0_oto = LineTime - Tvm_oto; + + Tpre_oto = Tvm_oto + Tr0_oto + Tsw_oto; + + dst_y_prefetch_oto = Tpre_oto / LineTime; + + if (dst_y_prefetch_oto < dst_y_prefetch_equ) + *DestinationLinesForPrefetch = dst_y_prefetch_oto; + else + *DestinationLinesForPrefetch = dst_y_prefetch_equ; + + *DestinationLinesForPrefetch = dml_floor(4.0 * (*DestinationLinesForPrefetch + 0.125), 1) + / 4; + + dml_print("DML: VStartup: %d\n", VStartup); + dml_print("DML: TCalc: %f\n", TCalc); + dml_print("DML: TWait: %f\n", TWait); + dml_print("DML: XFCRemoteSurfaceFlipDelay: %f\n", XFCRemoteSurfaceFlipDelay); + dml_print("DML: LineTime: %f\n", LineTime); + dml_print("DML: Tsetup: %f\n", Tsetup); + dml_print("DML: Tdm: %f\n", Tdm); + dml_print("DML: DSTYAfterScaler: %f\n", DSTYAfterScaler); + dml_print("DML: DSTXAfterScaler: %f\n", DSTXAfterScaler); + dml_print("DML: HTotal: %d\n", HTotal); + + *PrefetchBandwidth = 0; + *DestinationLinesToRequestVMInVBlank = 0; + *DestinationLinesToRequestRowInVBlank = 0; + *VRatioPrefetchY = 0; + *VRatioPrefetchC = 0; + *RequiredPrefetchPixDataBW = 0; + if (*DestinationLinesForPrefetch > 1) { + *PrefetchBandwidth = (PDEAndMetaPTEBytesFrame + 2 * MetaRowByte + + 2 * PixelPTEBytesPerRow + + PrefetchSourceLinesY * SwathWidthY * dml_ceil(BytePerPixelDETY, 1) + + PrefetchSourceLinesC * SwathWidthY / 2 + * dml_ceil(BytePerPixelDETC, 2)) + / (*DestinationLinesForPrefetch * LineTime - *Tno_bw); + if (GPUVMEnable) { + TimeForFetchingMetaPTE = + dml_max( + *Tno_bw + + (double) PDEAndMetaPTEBytesFrame + / *PrefetchBandwidth, + dml_max( + UrgentExtraLatency + + UrgentLatencyPixelDataOnly + * (PageTableLevels + - 1), + LineTime / 4)); + } else { + if (NumberOfCursors > 0 || XFCEnabled) + TimeForFetchingMetaPTE = LineTime / 4; + else + TimeForFetchingMetaPTE = 0.0; + } + + if ((GPUVMEnable == true || DCCEnable == true)) { + TimeForFetchingRowInVBlank = + dml_max( + (MetaRowByte + PixelPTEBytesPerRow) + / *PrefetchBandwidth, + dml_max( + UrgentLatencyPixelDataOnly, + dml_max( + LineTime + - TimeForFetchingMetaPTE, + LineTime + / 4.0))); + } else { + if (NumberOfCursors > 0 || XFCEnabled) + TimeForFetchingRowInVBlank = LineTime - TimeForFetchingMetaPTE; + else + TimeForFetchingRowInVBlank = 0.0; + } + + *DestinationLinesToRequestVMInVBlank = dml_floor( + 4.0 * (TimeForFetchingMetaPTE / LineTime + 0.125), + 1) / 4.0; + + *DestinationLinesToRequestRowInVBlank = dml_floor( + 4.0 * (TimeForFetchingRowInVBlank / LineTime + 0.125), + 1) / 4.0; + + LinesToRequestPrefetchPixelData = + *DestinationLinesForPrefetch + - ((NumberOfCursors > 0 || GPUVMEnable + || DCCEnable) ? + (*DestinationLinesToRequestVMInVBlank + + *DestinationLinesToRequestRowInVBlank) : + 0.0); + + if (LinesToRequestPrefetchPixelData > 0) { + + *VRatioPrefetchY = (double) PrefetchSourceLinesY + / LinesToRequestPrefetchPixelData; + *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); + if ((SwathHeightY > 4) && (VInitPreFillY > 3)) { + if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) { + *VRatioPrefetchY = + dml_max( + (double) PrefetchSourceLinesY + / LinesToRequestPrefetchPixelData, + (double) MaxNumSwathY + * SwathHeightY + / (LinesToRequestPrefetchPixelData + - (VInitPreFillY + - 3.0) + / 2.0)); + *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); + } else { + MyError = true; + *VRatioPrefetchY = 0; + } + } + + *VRatioPrefetchC = (double) PrefetchSourceLinesC + / LinesToRequestPrefetchPixelData; + *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); + + if ((SwathHeightC > 4)) { + if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) { + *VRatioPrefetchC = + dml_max( + *VRatioPrefetchC, + (double) MaxNumSwathC + * SwathHeightC + / (LinesToRequestPrefetchPixelData + - (VInitPreFillC + - 3.0) + / 2.0)); + *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); + } else { + MyError = true; + *VRatioPrefetchC = 0; + } + } + + *RequiredPrefetchPixDataBW = + DPPPerPlane + * ((double) PrefetchSourceLinesY + / LinesToRequestPrefetchPixelData + * dml_ceil( + BytePerPixelDETY, + 1) + + (double) PrefetchSourceLinesC + / LinesToRequestPrefetchPixelData + * dml_ceil( + BytePerPixelDETC, + 2) + / 2) + * SwathWidthY / LineTime; + } else { + MyError = true; + *VRatioPrefetchY = 0; + *VRatioPrefetchC = 0; + *RequiredPrefetchPixDataBW = 0; + } + + } else { + MyError = true; + } + + if (MyError) { + *PrefetchBandwidth = 0; + TimeForFetchingMetaPTE = 0; + TimeForFetchingRowInVBlank = 0; + *DestinationLinesToRequestVMInVBlank = 0; + *DestinationLinesToRequestRowInVBlank = 0; + *DestinationLinesForPrefetch = 0; + LinesToRequestPrefetchPixelData = 0; + *VRatioPrefetchY = 0; + *VRatioPrefetchC = 0; + *RequiredPrefetchPixDataBW = 0; + } + + return MyError; +} + +static double RoundToDFSGranularityUp(double Clock, double VCOSpeed) +{ + return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1); +} + +static double RoundToDFSGranularityDown(double Clock, double VCOSpeed) +{ + return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4 / Clock, 1); +} + +static double CalculatePrefetchSourceLines( + struct display_mode_lib *mode_lib, + double VRatio, + double vtaps, + bool Interlace, + bool ProgressiveToInterlaceUnitInOPP, + unsigned int SwathHeight, + unsigned int ViewportYStart, + double *VInitPreFill, + unsigned int *MaxNumSwath) +{ + unsigned int MaxPartialSwath; + + if (ProgressiveToInterlaceUnitInOPP) + *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1); + else + *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1); + + if (!mode_lib->vba.IgnoreViewportPositioning) { + + *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0; + + if (*VInitPreFill > 1.0) + MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight; + else + MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) + % SwathHeight; + MaxPartialSwath = dml_max(1U, MaxPartialSwath); + + } else { + + if (ViewportYStart != 0) + dml_print( + "WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n"); + + *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1); + + if (*VInitPreFill > 1.0) + MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight; + else + MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) + % SwathHeight; + } + + return *MaxNumSwath * SwathHeight + MaxPartialSwath; +} + +static unsigned int CalculateVMAndRowBytes( + struct display_mode_lib *mode_lib, + bool DCCEnable, + unsigned int BlockHeight256Bytes, + unsigned int BlockWidth256Bytes, + enum source_format_class SourcePixelFormat, + unsigned int SurfaceTiling, + unsigned int BytePerPixel, + enum scan_direction_class ScanDirection, + unsigned int ViewportWidth, + unsigned int ViewportHeight, + unsigned int SwathWidth, + bool GPUVMEnable, + unsigned int VMMPageSize, + unsigned int PTEBufferSizeInRequestsLuma, + unsigned int PDEProcessingBufIn64KBReqs, + unsigned int Pitch, + unsigned int DCCMetaPitch, + unsigned int *MacroTileWidth, + unsigned int *MetaRowByte, + unsigned int *PixelPTEBytesPerRow, + bool *PTEBufferSizeNotExceeded, + unsigned int *dpte_row_height, + unsigned int *meta_row_height) +{ + unsigned int MetaRequestHeight; + unsigned int MetaRequestWidth; + unsigned int MetaSurfWidth; + unsigned int MetaSurfHeight; + unsigned int MPDEBytesFrame; + unsigned int MetaPTEBytesFrame; + unsigned int DCCMetaSurfaceBytes; + + unsigned int MacroTileSizeBytes; + unsigned int MacroTileHeight; + unsigned int DPDE0BytesFrame; + unsigned int ExtraDPDEBytesFrame; + unsigned int PDEAndMetaPTEBytesFrame; + + if (DCCEnable == true) { + MetaRequestHeight = 8 * BlockHeight256Bytes; + MetaRequestWidth = 8 * BlockWidth256Bytes; + if (ScanDirection == dm_horz) { + *meta_row_height = MetaRequestHeight; + MetaSurfWidth = dml_ceil((double) SwathWidth - 1, MetaRequestWidth) + + MetaRequestWidth; + *MetaRowByte = MetaSurfWidth * MetaRequestHeight * BytePerPixel / 256.0; + } else { + *meta_row_height = MetaRequestWidth; + MetaSurfHeight = dml_ceil((double) SwathWidth - 1, MetaRequestHeight) + + MetaRequestHeight; + *MetaRowByte = MetaSurfHeight * MetaRequestWidth * BytePerPixel / 256.0; + } + if (ScanDirection == dm_horz) { + DCCMetaSurfaceBytes = DCCMetaPitch + * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + + 64 * BlockHeight256Bytes) * BytePerPixel + / 256; + } else { + DCCMetaSurfaceBytes = DCCMetaPitch + * (dml_ceil( + (double) ViewportHeight - 1, + 64 * BlockHeight256Bytes) + + 64 * BlockHeight256Bytes) * BytePerPixel + / 256; + } + if (GPUVMEnable == true) { + MetaPTEBytesFrame = (dml_ceil( + (double) (DCCMetaSurfaceBytes - VMMPageSize) + / (8 * VMMPageSize), + 1) + 1) * 64; + MPDEBytesFrame = 128 * (mode_lib->vba.GPUVMMaxPageTableLevels - 1); + } else { + MetaPTEBytesFrame = 0; + MPDEBytesFrame = 0; + } + } else { + MetaPTEBytesFrame = 0; + MPDEBytesFrame = 0; + *MetaRowByte = 0; + } + + if (SurfaceTiling == dm_sw_linear || SurfaceTiling == dm_sw_gfx7_2d_thin_gl || SurfaceTiling == dm_sw_gfx7_2d_thin_l_vp) { + MacroTileSizeBytes = 256; + MacroTileHeight = BlockHeight256Bytes; + } else if (SurfaceTiling == dm_sw_4kb_s || SurfaceTiling == dm_sw_4kb_s_x + || SurfaceTiling == dm_sw_4kb_d || SurfaceTiling == dm_sw_4kb_d_x) { + MacroTileSizeBytes = 4096; + MacroTileHeight = 4 * BlockHeight256Bytes; + } else if (SurfaceTiling == dm_sw_64kb_s || SurfaceTiling == dm_sw_64kb_s_t + || SurfaceTiling == dm_sw_64kb_s_x || SurfaceTiling == dm_sw_64kb_d + || SurfaceTiling == dm_sw_64kb_d_t || SurfaceTiling == dm_sw_64kb_d_x + || SurfaceTiling == dm_sw_64kb_r_x) { + MacroTileSizeBytes = 65536; + MacroTileHeight = 16 * BlockHeight256Bytes; + } else { + MacroTileSizeBytes = 262144; + MacroTileHeight = 32 * BlockHeight256Bytes; + } + *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight; + + if (GPUVMEnable == true && mode_lib->vba.GPUVMMaxPageTableLevels > 1) { + if (ScanDirection == dm_horz) { + DPDE0BytesFrame = + 64 + * (dml_ceil( + ((Pitch + * (dml_ceil( + ViewportHeight + - 1, + MacroTileHeight) + + MacroTileHeight) + * BytePerPixel) + - MacroTileSizeBytes) + / (8 + * 2097152), + 1) + 1); + } else { + DPDE0BytesFrame = + 64 + * (dml_ceil( + ((Pitch + * (dml_ceil( + (double) SwathWidth + - 1, + MacroTileHeight) + + MacroTileHeight) + * BytePerPixel) + - MacroTileSizeBytes) + / (8 + * 2097152), + 1) + 1); + } + ExtraDPDEBytesFrame = 128 * (mode_lib->vba.GPUVMMaxPageTableLevels - 2); + } else { + DPDE0BytesFrame = 0; + ExtraDPDEBytesFrame = 0; + } + + PDEAndMetaPTEBytesFrame = MetaPTEBytesFrame + MPDEBytesFrame + DPDE0BytesFrame + + ExtraDPDEBytesFrame; + + if (GPUVMEnable == true) { + unsigned int PTERequestSize; + unsigned int PixelPTEReqHeight; + unsigned int PixelPTEReqWidth; + double FractionOfPTEReturnDrop; + unsigned int EffectivePDEProcessingBufIn64KBReqs; + + if (SurfaceTiling == dm_sw_linear) { + PixelPTEReqHeight = 1; + PixelPTEReqWidth = 8.0 * VMMPageSize / BytePerPixel; + PTERequestSize = 64; + FractionOfPTEReturnDrop = 0; + } else if (MacroTileSizeBytes == 4096) { + PixelPTEReqHeight = MacroTileHeight; + PixelPTEReqWidth = 8 * *MacroTileWidth; + PTERequestSize = 64; + if (ScanDirection == dm_horz) + FractionOfPTEReturnDrop = 0; + else + FractionOfPTEReturnDrop = 7 / 8; + } else if (VMMPageSize == 4096 && MacroTileSizeBytes > 4096) { + PixelPTEReqHeight = 16 * BlockHeight256Bytes; + PixelPTEReqWidth = 16 * BlockWidth256Bytes; + PTERequestSize = 128; + FractionOfPTEReturnDrop = 0; + } else { + PixelPTEReqHeight = MacroTileHeight; + PixelPTEReqWidth = 8 * *MacroTileWidth; + PTERequestSize = 64; + FractionOfPTEReturnDrop = 0; + } + + if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) + EffectivePDEProcessingBufIn64KBReqs = PDEProcessingBufIn64KBReqs / 2; + else + EffectivePDEProcessingBufIn64KBReqs = PDEProcessingBufIn64KBReqs; + + if (SurfaceTiling == dm_sw_linear) { + *dpte_row_height = + dml_min( + 128, + 1 + << (unsigned int) dml_floor( + dml_log2( + dml_min( + (double) PTEBufferSizeInRequestsLuma + * PixelPTEReqWidth, + EffectivePDEProcessingBufIn64KBReqs + * 65536.0 + / BytePerPixel) + / Pitch), + 1)); + *PixelPTEBytesPerRow = PTERequestSize + * (dml_ceil( + (double) (Pitch * *dpte_row_height - 1) + / PixelPTEReqWidth, + 1) + 1); + } else if (ScanDirection == dm_horz) { + *dpte_row_height = PixelPTEReqHeight; + *PixelPTEBytesPerRow = PTERequestSize + * (dml_ceil(((double) SwathWidth - 1) / PixelPTEReqWidth, 1) + + 1); + } else { + *dpte_row_height = dml_min(PixelPTEReqWidth, *MacroTileWidth); + *PixelPTEBytesPerRow = PTERequestSize + * (dml_ceil( + ((double) SwathWidth - 1) + / PixelPTEReqHeight, + 1) + 1); + } + if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) + <= 64 * PTEBufferSizeInRequestsLuma) { + *PTEBufferSizeNotExceeded = true; + } else { + *PTEBufferSizeNotExceeded = false; + } + } else { + *PixelPTEBytesPerRow = 0; + *PTEBufferSizeNotExceeded = true; + } + + return PDEAndMetaPTEBytesFrame; +} + +static void dml20v2_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation( + struct display_mode_lib *mode_lib) +{ + unsigned int j, k; + + mode_lib->vba.WritebackDISPCLK = 0.0; + mode_lib->vba.DISPCLKWithRamping = 0; + mode_lib->vba.DISPCLKWithoutRamping = 0; + mode_lib->vba.GlobalDPPCLK = 0.0; + + // dml_ml->vba.DISPCLK and dml_ml->vba.DPPCLK Calculation + // + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.WritebackEnable[k]) { + mode_lib->vba.WritebackDISPCLK = + dml_max( + mode_lib->vba.WritebackDISPCLK, + CalculateWriteBackDISPCLK( + mode_lib->vba.WritebackPixelFormat[k], + mode_lib->vba.PixelClock[k], + mode_lib->vba.WritebackHRatio[k], + mode_lib->vba.WritebackVRatio[k], + mode_lib->vba.WritebackLumaHTaps[k], + mode_lib->vba.WritebackLumaVTaps[k], + mode_lib->vba.WritebackChromaHTaps[k], + mode_lib->vba.WritebackChromaVTaps[k], + mode_lib->vba.WritebackDestinationWidth[k], + mode_lib->vba.HTotal[k], + mode_lib->vba.WritebackChromaLineBufferWidth)); + } + } + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.HRatio[k] > 1) { + mode_lib->vba.PSCL_THROUGHPUT_LUMA[k] = dml_min( + mode_lib->vba.MaxDCHUBToPSCLThroughput, + mode_lib->vba.MaxPSCLToLBThroughput + * mode_lib->vba.HRatio[k] + / dml_ceil( + mode_lib->vba.htaps[k] + / 6.0, + 1)); + } else { + mode_lib->vba.PSCL_THROUGHPUT_LUMA[k] = dml_min( + mode_lib->vba.MaxDCHUBToPSCLThroughput, + mode_lib->vba.MaxPSCLToLBThroughput); + } + + mode_lib->vba.DPPCLKUsingSingleDPPLuma = + mode_lib->vba.PixelClock[k] + * dml_max( + mode_lib->vba.vtaps[k] / 6.0 + * dml_min( + 1.0, + mode_lib->vba.HRatio[k]), + dml_max( + mode_lib->vba.HRatio[k] + * mode_lib->vba.VRatio[k] + / mode_lib->vba.PSCL_THROUGHPUT_LUMA[k], + 1.0)); + + if ((mode_lib->vba.htaps[k] > 6 || mode_lib->vba.vtaps[k] > 6) + && mode_lib->vba.DPPCLKUsingSingleDPPLuma + < 2 * mode_lib->vba.PixelClock[k]) { + mode_lib->vba.DPPCLKUsingSingleDPPLuma = 2 * mode_lib->vba.PixelClock[k]; + } + + if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8 + && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) { + mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k] = 0.0; + mode_lib->vba.DPPCLKUsingSingleDPP[k] = + mode_lib->vba.DPPCLKUsingSingleDPPLuma; + } else { + if (mode_lib->vba.HRatio[k] > 1) { + mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k] = + dml_min( + mode_lib->vba.MaxDCHUBToPSCLThroughput, + mode_lib->vba.MaxPSCLToLBThroughput + * mode_lib->vba.HRatio[k] + / 2 + / dml_ceil( + mode_lib->vba.HTAPsChroma[k] + / 6.0, + 1.0)); + } else { + mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k] = dml_min( + mode_lib->vba.MaxDCHUBToPSCLThroughput, + mode_lib->vba.MaxPSCLToLBThroughput); + } + mode_lib->vba.DPPCLKUsingSingleDPPChroma = + mode_lib->vba.PixelClock[k] + * dml_max( + mode_lib->vba.VTAPsChroma[k] + / 6.0 + * dml_min( + 1.0, + mode_lib->vba.HRatio[k] + / 2), + dml_max( + mode_lib->vba.HRatio[k] + * mode_lib->vba.VRatio[k] + / 4 + / mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k], + 1.0)); + + if ((mode_lib->vba.HTAPsChroma[k] > 6 || mode_lib->vba.VTAPsChroma[k] > 6) + && mode_lib->vba.DPPCLKUsingSingleDPPChroma + < 2 * mode_lib->vba.PixelClock[k]) { + mode_lib->vba.DPPCLKUsingSingleDPPChroma = 2 + * mode_lib->vba.PixelClock[k]; + } + + mode_lib->vba.DPPCLKUsingSingleDPP[k] = dml_max( + mode_lib->vba.DPPCLKUsingSingleDPPLuma, + mode_lib->vba.DPPCLKUsingSingleDPPChroma); + } + } + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.BlendingAndTiming[k] != k) + continue; + if (mode_lib->vba.ODMCombineEnabled[k]) { + mode_lib->vba.DISPCLKWithRamping = + dml_max( + mode_lib->vba.DISPCLKWithRamping, + mode_lib->vba.PixelClock[k] / 2 + * (1 + + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading + / 100) + * (1 + + mode_lib->vba.DISPCLKRampingMargin + / 100)); + mode_lib->vba.DISPCLKWithoutRamping = + dml_max( + mode_lib->vba.DISPCLKWithoutRamping, + mode_lib->vba.PixelClock[k] / 2 + * (1 + + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading + / 100)); + } else if (!mode_lib->vba.ODMCombineEnabled[k]) { + mode_lib->vba.DISPCLKWithRamping = + dml_max( + mode_lib->vba.DISPCLKWithRamping, + mode_lib->vba.PixelClock[k] + * (1 + + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading + / 100) + * (1 + + mode_lib->vba.DISPCLKRampingMargin + / 100)); + mode_lib->vba.DISPCLKWithoutRamping = + dml_max( + mode_lib->vba.DISPCLKWithoutRamping, + mode_lib->vba.PixelClock[k] + * (1 + + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading + / 100)); + } + } + + mode_lib->vba.DISPCLKWithRamping = dml_max( + mode_lib->vba.DISPCLKWithRamping, + mode_lib->vba.WritebackDISPCLK); + mode_lib->vba.DISPCLKWithoutRamping = dml_max( + mode_lib->vba.DISPCLKWithoutRamping, + mode_lib->vba.WritebackDISPCLK); + + ASSERT(mode_lib->vba.DISPCLKDPPCLKVCOSpeed != 0); + mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp( + mode_lib->vba.DISPCLKWithRamping, + mode_lib->vba.DISPCLKDPPCLKVCOSpeed); + mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp( + mode_lib->vba.DISPCLKWithoutRamping, + mode_lib->vba.DISPCLKDPPCLKVCOSpeed); + mode_lib->vba.MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown( + mode_lib->vba.soc.clock_limits[mode_lib->vba.soc.num_states].dispclk_mhz, + mode_lib->vba.DISPCLKDPPCLKVCOSpeed); + if (mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity + > mode_lib->vba.MaxDispclkRoundedToDFSGranularity) { + mode_lib->vba.DISPCLK_calculated = + mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity; + } else if (mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity + > mode_lib->vba.MaxDispclkRoundedToDFSGranularity) { + mode_lib->vba.DISPCLK_calculated = mode_lib->vba.MaxDispclkRoundedToDFSGranularity; + } else { + mode_lib->vba.DISPCLK_calculated = + mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity; + } + DTRACE(" dispclk_mhz (calculated) = %f", mode_lib->vba.DISPCLK_calculated); + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.DPPPerPlane[k] == 0) { + mode_lib->vba.DPPCLK_calculated[k] = 0; + } else { + mode_lib->vba.DPPCLK_calculated[k] = mode_lib->vba.DPPCLKUsingSingleDPP[k] + / mode_lib->vba.DPPPerPlane[k] + * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100); + } + mode_lib->vba.GlobalDPPCLK = dml_max( + mode_lib->vba.GlobalDPPCLK, + mode_lib->vba.DPPCLK_calculated[k]); + } + mode_lib->vba.GlobalDPPCLK = RoundToDFSGranularityUp( + mode_lib->vba.GlobalDPPCLK, + mode_lib->vba.DISPCLKDPPCLKVCOSpeed); + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + mode_lib->vba.DPPCLK_calculated[k] = mode_lib->vba.GlobalDPPCLK / 255 + * dml_ceil( + mode_lib->vba.DPPCLK_calculated[k] * 255 + / mode_lib->vba.GlobalDPPCLK, + 1); + DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, mode_lib->vba.DPPCLK_calculated[k]); + } + + // Urgent Watermark + mode_lib->vba.DCCEnabledAnyPlane = false; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) + if (mode_lib->vba.DCCEnable[k]) + mode_lib->vba.DCCEnabledAnyPlane = true; + + mode_lib->vba.ReturnBandwidthToDCN = dml_min( + mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLK, + mode_lib->vba.FabricAndDRAMBandwidth * 1000) + * mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly / 100; + + mode_lib->vba.ReturnBW = mode_lib->vba.ReturnBandwidthToDCN; + mode_lib->vba.ReturnBW = adjust_ReturnBW( + mode_lib, + mode_lib->vba.ReturnBW, + mode_lib->vba.DCCEnabledAnyPlane, + mode_lib->vba.ReturnBandwidthToDCN); + + // Let's do this calculation again?? + mode_lib->vba.ReturnBandwidthToDCN = dml_min( + mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLK, + mode_lib->vba.FabricAndDRAMBandwidth * 1000); + mode_lib->vba.ReturnBW = adjust_ReturnBW( + mode_lib, + mode_lib->vba.ReturnBW, + mode_lib->vba.DCCEnabledAnyPlane, + mode_lib->vba.ReturnBandwidthToDCN); + + DTRACE(" dcfclk_mhz = %f", mode_lib->vba.DCFCLK); + DTRACE(" return_bw_to_dcn = %f", mode_lib->vba.ReturnBandwidthToDCN); + DTRACE(" return_bus_bw = %f", mode_lib->vba.ReturnBW); + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + bool MainPlaneDoesODMCombine = false; + + if (mode_lib->vba.SourceScan[k] == dm_horz) + mode_lib->vba.SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportWidth[k]; + else + mode_lib->vba.SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportHeight[k]; + + if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) + MainPlaneDoesODMCombine = true; + for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) + if (mode_lib->vba.BlendingAndTiming[k] == j + && mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) + MainPlaneDoesODMCombine = true; + + if (MainPlaneDoesODMCombine == true) + mode_lib->vba.SwathWidthY[k] = dml_min( + (double) mode_lib->vba.SwathWidthSingleDPPY[k], + dml_round( + mode_lib->vba.HActive[k] / 2.0 + * mode_lib->vba.HRatio[k])); + else { + if (mode_lib->vba.DPPPerPlane[k] == 0) { + mode_lib->vba.SwathWidthY[k] = 0; + } else { + mode_lib->vba.SwathWidthY[k] = mode_lib->vba.SwathWidthSingleDPPY[k] + / mode_lib->vba.DPPPerPlane[k]; + } + } + } + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) { + mode_lib->vba.BytePerPixelDETY[k] = 8; + mode_lib->vba.BytePerPixelDETC[k] = 0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32) { + mode_lib->vba.BytePerPixelDETY[k] = 4; + mode_lib->vba.BytePerPixelDETC[k] = 0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_16) { + mode_lib->vba.BytePerPixelDETY[k] = 2; + mode_lib->vba.BytePerPixelDETC[k] = 0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_8) { + mode_lib->vba.BytePerPixelDETY[k] = 1; + mode_lib->vba.BytePerPixelDETC[k] = 0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) { + mode_lib->vba.BytePerPixelDETY[k] = 1; + mode_lib->vba.BytePerPixelDETC[k] = 2; + } else { // dm_420_10 + mode_lib->vba.BytePerPixelDETY[k] = 4.0 / 3.0; + mode_lib->vba.BytePerPixelDETC[k] = 8.0 / 3.0; + } + } + + mode_lib->vba.TotalDataReadBandwidth = 0.0; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + mode_lib->vba.ReadBandwidthPlaneLuma[k] = mode_lib->vba.SwathWidthSingleDPPY[k] + * dml_ceil(mode_lib->vba.BytePerPixelDETY[k], 1) + / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) + * mode_lib->vba.VRatio[k]; + mode_lib->vba.ReadBandwidthPlaneChroma[k] = mode_lib->vba.SwathWidthSingleDPPY[k] + / 2 * dml_ceil(mode_lib->vba.BytePerPixelDETC[k], 2) + / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) + * mode_lib->vba.VRatio[k] / 2; + DTRACE( + " read_bw[%i] = %fBps", + k, + mode_lib->vba.ReadBandwidthPlaneLuma[k] + + mode_lib->vba.ReadBandwidthPlaneChroma[k]); + mode_lib->vba.TotalDataReadBandwidth += mode_lib->vba.ReadBandwidthPlaneLuma[k] + + mode_lib->vba.ReadBandwidthPlaneChroma[k]; + } + + mode_lib->vba.TotalDCCActiveDPP = 0; + mode_lib->vba.TotalActiveDPP = 0; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + mode_lib->vba.TotalActiveDPP = mode_lib->vba.TotalActiveDPP + + mode_lib->vba.DPPPerPlane[k]; + if (mode_lib->vba.DCCEnable[k]) + mode_lib->vba.TotalDCCActiveDPP = mode_lib->vba.TotalDCCActiveDPP + + mode_lib->vba.DPPPerPlane[k]; + } + + mode_lib->vba.UrgentRoundTripAndOutOfOrderLatency = + (mode_lib->vba.RoundTripPingLatencyCycles + 32) / mode_lib->vba.DCFCLK + + mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly + * mode_lib->vba.NumberOfChannels + / mode_lib->vba.ReturnBW; + + mode_lib->vba.LastPixelOfLineExtraWatermark = 0; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.VRatio[k] <= 1.0) + mode_lib->vba.DisplayPipeLineDeliveryTimeLuma[k] = + (double) mode_lib->vba.SwathWidthY[k] + * mode_lib->vba.DPPPerPlane[k] + / mode_lib->vba.HRatio[k] + / mode_lib->vba.PixelClock[k]; + else + mode_lib->vba.DisplayPipeLineDeliveryTimeLuma[k] = + (double) mode_lib->vba.SwathWidthY[k] + / mode_lib->vba.PSCL_THROUGHPUT_LUMA[k] + / mode_lib->vba.DPPCLK[k]; + + if (mode_lib->vba.BytePerPixelDETC[k] == 0) + mode_lib->vba.DisplayPipeLineDeliveryTimeChroma[k] = 0.0; + else if (mode_lib->vba.VRatio[k] / 2.0 <= 1.0) + mode_lib->vba.DisplayPipeLineDeliveryTimeChroma[k] = + mode_lib->vba.SwathWidthY[k] / 2.0 + * mode_lib->vba.DPPPerPlane[k] + / (mode_lib->vba.HRatio[k] / 2.0) + / mode_lib->vba.PixelClock[k]; + else + mode_lib->vba.DisplayPipeLineDeliveryTimeChroma[k] = + mode_lib->vba.SwathWidthY[k] / 2.0 + / mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k] + / mode_lib->vba.DPPCLK[k]; + } + + mode_lib->vba.UrgentExtraLatency = mode_lib->vba.UrgentRoundTripAndOutOfOrderLatency + + (mode_lib->vba.TotalActiveDPP * mode_lib->vba.PixelChunkSizeInKByte + + mode_lib->vba.TotalDCCActiveDPP + * mode_lib->vba.MetaChunkSize) * 1024.0 + / mode_lib->vba.ReturnBW; + + if (mode_lib->vba.GPUVMEnable) + mode_lib->vba.UrgentExtraLatency += mode_lib->vba.TotalActiveDPP + * mode_lib->vba.PTEGroupSize / mode_lib->vba.ReturnBW; + + mode_lib->vba.UrgentWatermark = mode_lib->vba.UrgentLatencyPixelDataOnly + + mode_lib->vba.LastPixelOfLineExtraWatermark + + mode_lib->vba.UrgentExtraLatency; + + DTRACE(" urgent_extra_latency = %fus", mode_lib->vba.UrgentExtraLatency); + DTRACE(" wm_urgent = %fus", mode_lib->vba.UrgentWatermark); + + mode_lib->vba.UrgentLatency = mode_lib->vba.UrgentLatencyPixelDataOnly; + + mode_lib->vba.TotalActiveWriteback = 0; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.WritebackEnable[k]) + mode_lib->vba.TotalActiveWriteback = mode_lib->vba.TotalActiveWriteback + mode_lib->vba.ActiveWritebacksPerPlane[k]; + } + + if (mode_lib->vba.TotalActiveWriteback <= 1) + mode_lib->vba.WritebackUrgentWatermark = mode_lib->vba.WritebackLatency; + else + mode_lib->vba.WritebackUrgentWatermark = mode_lib->vba.WritebackLatency + + mode_lib->vba.WritebackChunkSize * 1024.0 / 32 + / mode_lib->vba.SOCCLK; + + DTRACE(" wm_wb_urgent = %fus", mode_lib->vba.WritebackUrgentWatermark); + + // NB P-State/DRAM Clock Change Watermark + mode_lib->vba.DRAMClockChangeWatermark = mode_lib->vba.DRAMClockChangeLatency + + mode_lib->vba.UrgentWatermark; + + DTRACE(" wm_pstate_change = %fus", mode_lib->vba.DRAMClockChangeWatermark); + + DTRACE(" calculating wb pstate watermark"); + DTRACE(" total wb outputs %d", mode_lib->vba.TotalActiveWriteback); + DTRACE(" socclk frequency %f Mhz", mode_lib->vba.SOCCLK); + + if (mode_lib->vba.TotalActiveWriteback <= 1) + mode_lib->vba.WritebackDRAMClockChangeWatermark = + mode_lib->vba.DRAMClockChangeLatency + + mode_lib->vba.WritebackLatency; + else + mode_lib->vba.WritebackDRAMClockChangeWatermark = + mode_lib->vba.DRAMClockChangeLatency + + mode_lib->vba.WritebackLatency + + mode_lib->vba.WritebackChunkSize * 1024.0 / 32 + / mode_lib->vba.SOCCLK; + + DTRACE(" wm_wb_pstate %fus", mode_lib->vba.WritebackDRAMClockChangeWatermark); + + // Stutter Efficiency + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + mode_lib->vba.LinesInDETY[k] = mode_lib->vba.DETBufferSizeY[k] + / mode_lib->vba.BytePerPixelDETY[k] / mode_lib->vba.SwathWidthY[k]; + mode_lib->vba.LinesInDETYRoundedDownToSwath[k] = dml_floor( + mode_lib->vba.LinesInDETY[k], + mode_lib->vba.SwathHeightY[k]); + mode_lib->vba.FullDETBufferingTimeY[k] = + mode_lib->vba.LinesInDETYRoundedDownToSwath[k] + * (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]) + / mode_lib->vba.VRatio[k]; + if (mode_lib->vba.BytePerPixelDETC[k] > 0) { + mode_lib->vba.LinesInDETC[k] = mode_lib->vba.DETBufferSizeC[k] + / mode_lib->vba.BytePerPixelDETC[k] + / (mode_lib->vba.SwathWidthY[k] / 2); + mode_lib->vba.LinesInDETCRoundedDownToSwath[k] = dml_floor( + mode_lib->vba.LinesInDETC[k], + mode_lib->vba.SwathHeightC[k]); + mode_lib->vba.FullDETBufferingTimeC[k] = + mode_lib->vba.LinesInDETCRoundedDownToSwath[k] + * (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]) + / (mode_lib->vba.VRatio[k] / 2); + } else { + mode_lib->vba.LinesInDETC[k] = 0; + mode_lib->vba.LinesInDETCRoundedDownToSwath[k] = 0; + mode_lib->vba.FullDETBufferingTimeC[k] = 999999; + } + } + + mode_lib->vba.MinFullDETBufferingTime = 999999.0; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.FullDETBufferingTimeY[k] + < mode_lib->vba.MinFullDETBufferingTime) { + mode_lib->vba.MinFullDETBufferingTime = + mode_lib->vba.FullDETBufferingTimeY[k]; + mode_lib->vba.FrameTimeForMinFullDETBufferingTime = + (double) mode_lib->vba.VTotal[k] * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]; + } + if (mode_lib->vba.FullDETBufferingTimeC[k] + < mode_lib->vba.MinFullDETBufferingTime) { + mode_lib->vba.MinFullDETBufferingTime = + mode_lib->vba.FullDETBufferingTimeC[k]; + mode_lib->vba.FrameTimeForMinFullDETBufferingTime = + (double) mode_lib->vba.VTotal[k] * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]; + } + } + + mode_lib->vba.AverageReadBandwidthGBytePerSecond = 0.0; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.DCCEnable[k]) { + mode_lib->vba.AverageReadBandwidthGBytePerSecond = + mode_lib->vba.AverageReadBandwidthGBytePerSecond + + mode_lib->vba.ReadBandwidthPlaneLuma[k] + / mode_lib->vba.DCCRate[k] + / 1000 + + mode_lib->vba.ReadBandwidthPlaneChroma[k] + / mode_lib->vba.DCCRate[k] + / 1000; + } else { + mode_lib->vba.AverageReadBandwidthGBytePerSecond = + mode_lib->vba.AverageReadBandwidthGBytePerSecond + + mode_lib->vba.ReadBandwidthPlaneLuma[k] + / 1000 + + mode_lib->vba.ReadBandwidthPlaneChroma[k] + / 1000; + } + if (mode_lib->vba.DCCEnable[k]) { + mode_lib->vba.AverageReadBandwidthGBytePerSecond = + mode_lib->vba.AverageReadBandwidthGBytePerSecond + + mode_lib->vba.ReadBandwidthPlaneLuma[k] + / 1000 / 256 + + mode_lib->vba.ReadBandwidthPlaneChroma[k] + / 1000 / 256; + } + if (mode_lib->vba.GPUVMEnable) { + mode_lib->vba.AverageReadBandwidthGBytePerSecond = + mode_lib->vba.AverageReadBandwidthGBytePerSecond + + mode_lib->vba.ReadBandwidthPlaneLuma[k] + / 1000 / 512 + + mode_lib->vba.ReadBandwidthPlaneChroma[k] + / 1000 / 512; + } + } + + mode_lib->vba.PartOfBurstThatFitsInROB = + dml_min( + mode_lib->vba.MinFullDETBufferingTime + * mode_lib->vba.TotalDataReadBandwidth, + mode_lib->vba.ROBBufferSizeInKByte * 1024 + * mode_lib->vba.TotalDataReadBandwidth + / (mode_lib->vba.AverageReadBandwidthGBytePerSecond + * 1000)); + mode_lib->vba.StutterBurstTime = mode_lib->vba.PartOfBurstThatFitsInROB + * (mode_lib->vba.AverageReadBandwidthGBytePerSecond * 1000) + / mode_lib->vba.TotalDataReadBandwidth / mode_lib->vba.ReturnBW + + (mode_lib->vba.MinFullDETBufferingTime + * mode_lib->vba.TotalDataReadBandwidth + - mode_lib->vba.PartOfBurstThatFitsInROB) + / (mode_lib->vba.DCFCLK * 64); + if (mode_lib->vba.TotalActiveWriteback == 0) { + mode_lib->vba.StutterEfficiencyNotIncludingVBlank = (1 + - (mode_lib->vba.SRExitTime + mode_lib->vba.StutterBurstTime) + / mode_lib->vba.MinFullDETBufferingTime) * 100; + } else { + mode_lib->vba.StutterEfficiencyNotIncludingVBlank = 0; + } + + mode_lib->vba.SmallestVBlank = 999999; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.SynchronizedVBlank || mode_lib->vba.NumberOfActivePlanes == 1) { + mode_lib->vba.VBlankTime = (double) (mode_lib->vba.VTotal[k] + - mode_lib->vba.VActive[k]) * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]; + } else { + mode_lib->vba.VBlankTime = 0; + } + mode_lib->vba.SmallestVBlank = dml_min( + mode_lib->vba.SmallestVBlank, + mode_lib->vba.VBlankTime); + } + + mode_lib->vba.StutterEfficiency = (mode_lib->vba.StutterEfficiencyNotIncludingVBlank / 100 + * (mode_lib->vba.FrameTimeForMinFullDETBufferingTime + - mode_lib->vba.SmallestVBlank) + + mode_lib->vba.SmallestVBlank) + / mode_lib->vba.FrameTimeForMinFullDETBufferingTime * 100; + + // dml_ml->vba.DCFCLK Deep Sleep + mode_lib->vba.DCFCLKDeepSleep = 8.0; + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; k++) { + if (mode_lib->vba.BytePerPixelDETC[k] > 0) { + mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = + dml_max( + 1.1 * mode_lib->vba.SwathWidthY[k] + * dml_ceil( + mode_lib->vba.BytePerPixelDETY[k], + 1) / 32 + / mode_lib->vba.DisplayPipeLineDeliveryTimeLuma[k], + 1.1 * mode_lib->vba.SwathWidthY[k] / 2.0 + * dml_ceil( + mode_lib->vba.BytePerPixelDETC[k], + 2) / 32 + / mode_lib->vba.DisplayPipeLineDeliveryTimeChroma[k]); + } else + mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = 1.1 * mode_lib->vba.SwathWidthY[k] + * dml_ceil(mode_lib->vba.BytePerPixelDETY[k], 1) / 64.0 + / mode_lib->vba.DisplayPipeLineDeliveryTimeLuma[k]; + mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = dml_max( + mode_lib->vba.DCFCLKDeepSleepPerPlane[k], + mode_lib->vba.PixelClock[k] / 16.0); + mode_lib->vba.DCFCLKDeepSleep = dml_max( + mode_lib->vba.DCFCLKDeepSleep, + mode_lib->vba.DCFCLKDeepSleepPerPlane[k]); + + DTRACE( + " dcfclk_deepsleep_per_plane[%i] = %fMHz", + k, + mode_lib->vba.DCFCLKDeepSleepPerPlane[k]); + } + + DTRACE(" dcfclk_deepsleep_mhz = %fMHz", mode_lib->vba.DCFCLKDeepSleep); + + // Stutter Watermark + mode_lib->vba.StutterExitWatermark = mode_lib->vba.SRExitTime + + mode_lib->vba.LastPixelOfLineExtraWatermark + + mode_lib->vba.UrgentExtraLatency + 10 / mode_lib->vba.DCFCLKDeepSleep; + mode_lib->vba.StutterEnterPlusExitWatermark = mode_lib->vba.SREnterPlusExitTime + + mode_lib->vba.LastPixelOfLineExtraWatermark + + mode_lib->vba.UrgentExtraLatency; + + DTRACE(" wm_cstate_exit = %fus", mode_lib->vba.StutterExitWatermark); + DTRACE(" wm_cstate_enter_exit = %fus", mode_lib->vba.StutterEnterPlusExitWatermark); + + // Urgent Latency Supported + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + mode_lib->vba.EffectiveDETPlusLBLinesLuma = + dml_floor( + mode_lib->vba.LinesInDETY[k] + + dml_min( + mode_lib->vba.LinesInDETY[k] + * mode_lib->vba.DPPCLK[k] + * mode_lib->vba.BytePerPixelDETY[k] + * mode_lib->vba.PSCL_THROUGHPUT_LUMA[k] + / (mode_lib->vba.ReturnBW + / mode_lib->vba.DPPPerPlane[k]), + (double) mode_lib->vba.EffectiveLBLatencyHidingSourceLinesLuma), + mode_lib->vba.SwathHeightY[k]); + + mode_lib->vba.UrgentLatencySupportUsLuma = mode_lib->vba.EffectiveDETPlusLBLinesLuma + * (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) + / mode_lib->vba.VRatio[k] + - mode_lib->vba.EffectiveDETPlusLBLinesLuma + * mode_lib->vba.SwathWidthY[k] + * mode_lib->vba.BytePerPixelDETY[k] + / (mode_lib->vba.ReturnBW + / mode_lib->vba.DPPPerPlane[k]); + + if (mode_lib->vba.BytePerPixelDETC[k] > 0) { + mode_lib->vba.EffectiveDETPlusLBLinesChroma = + dml_floor( + mode_lib->vba.LinesInDETC[k] + + dml_min( + mode_lib->vba.LinesInDETC[k] + * mode_lib->vba.DPPCLK[k] + * mode_lib->vba.BytePerPixelDETC[k] + * mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k] + / (mode_lib->vba.ReturnBW + / mode_lib->vba.DPPPerPlane[k]), + (double) mode_lib->vba.EffectiveLBLatencyHidingSourceLinesChroma), + mode_lib->vba.SwathHeightC[k]); + mode_lib->vba.UrgentLatencySupportUsChroma = + mode_lib->vba.EffectiveDETPlusLBLinesChroma + * (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]) + / (mode_lib->vba.VRatio[k] / 2) + - mode_lib->vba.EffectiveDETPlusLBLinesChroma + * (mode_lib->vba.SwathWidthY[k] + / 2) + * mode_lib->vba.BytePerPixelDETC[k] + / (mode_lib->vba.ReturnBW + / mode_lib->vba.DPPPerPlane[k]); + mode_lib->vba.UrgentLatencySupportUs[k] = dml_min( + mode_lib->vba.UrgentLatencySupportUsLuma, + mode_lib->vba.UrgentLatencySupportUsChroma); + } else { + mode_lib->vba.UrgentLatencySupportUs[k] = + mode_lib->vba.UrgentLatencySupportUsLuma; + } + } + + mode_lib->vba.MinUrgentLatencySupportUs = 999999; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + mode_lib->vba.MinUrgentLatencySupportUs = dml_min( + mode_lib->vba.MinUrgentLatencySupportUs, + mode_lib->vba.UrgentLatencySupportUs[k]); + } + + // Non-Urgent Latency Tolerance + mode_lib->vba.NonUrgentLatencyTolerance = mode_lib->vba.MinUrgentLatencySupportUs + - mode_lib->vba.UrgentWatermark; + + // DSCCLK + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if ((mode_lib->vba.BlendingAndTiming[k] != k) || !mode_lib->vba.DSCEnabled[k]) { + mode_lib->vba.DSCCLK_calculated[k] = 0.0; + } else { + if (mode_lib->vba.OutputFormat[k] == dm_420 + || mode_lib->vba.OutputFormat[k] == dm_n422) + mode_lib->vba.DSCFormatFactor = 2; + else + mode_lib->vba.DSCFormatFactor = 1; + if (mode_lib->vba.ODMCombineEnabled[k]) + mode_lib->vba.DSCCLK_calculated[k] = + mode_lib->vba.PixelClockBackEnd[k] / 6 + / mode_lib->vba.DSCFormatFactor + / (1 + - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading + / 100); + else + mode_lib->vba.DSCCLK_calculated[k] = + mode_lib->vba.PixelClockBackEnd[k] / 3 + / mode_lib->vba.DSCFormatFactor + / (1 + - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading + / 100); + } + } + + // DSC Delay + // TODO + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + double bpp = mode_lib->vba.OutputBpp[k]; + unsigned int slices = mode_lib->vba.NumberOfDSCSlices[k]; + + if (mode_lib->vba.DSCEnabled[k] && bpp != 0) { + if (!mode_lib->vba.ODMCombineEnabled[k]) { + mode_lib->vba.DSCDelay[k] = + dscceComputeDelay( + mode_lib->vba.DSCInputBitPerComponent[k], + bpp, + dml_ceil( + (double) mode_lib->vba.HActive[k] + / mode_lib->vba.NumberOfDSCSlices[k], + 1), + slices, + mode_lib->vba.OutputFormat[k]) + + dscComputeDelay( + mode_lib->vba.OutputFormat[k]); + } else { + mode_lib->vba.DSCDelay[k] = + 2 + * (dscceComputeDelay( + mode_lib->vba.DSCInputBitPerComponent[k], + bpp, + dml_ceil( + (double) mode_lib->vba.HActive[k] + / mode_lib->vba.NumberOfDSCSlices[k], + 1), + slices / 2.0, + mode_lib->vba.OutputFormat[k]) + + dscComputeDelay( + mode_lib->vba.OutputFormat[k])); + } + mode_lib->vba.DSCDelay[k] = mode_lib->vba.DSCDelay[k] + * mode_lib->vba.PixelClock[k] + / mode_lib->vba.PixelClockBackEnd[k]; + } else { + mode_lib->vba.DSCDelay[k] = 0; + } + } + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) + for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) // NumberOfPlanes + if (j != k && mode_lib->vba.BlendingAndTiming[k] == j + && mode_lib->vba.DSCEnabled[j]) + mode_lib->vba.DSCDelay[k] = mode_lib->vba.DSCDelay[j]; + + // Prefetch + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + unsigned int PDEAndMetaPTEBytesFrameY; + unsigned int PixelPTEBytesPerRowY; + unsigned int MetaRowByteY; + unsigned int MetaRowByteC; + unsigned int PDEAndMetaPTEBytesFrameC; + unsigned int PixelPTEBytesPerRowC; + + Calculate256BBlockSizes( + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.SurfaceTiling[k], + dml_ceil(mode_lib->vba.BytePerPixelDETY[k], 1), + dml_ceil(mode_lib->vba.BytePerPixelDETC[k], 2), + &mode_lib->vba.BlockHeight256BytesY[k], + &mode_lib->vba.BlockHeight256BytesC[k], + &mode_lib->vba.BlockWidth256BytesY[k], + &mode_lib->vba.BlockWidth256BytesC[k]); + PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes( + mode_lib, + mode_lib->vba.DCCEnable[k], + mode_lib->vba.BlockHeight256BytesY[k], + mode_lib->vba.BlockWidth256BytesY[k], + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.SurfaceTiling[k], + dml_ceil(mode_lib->vba.BytePerPixelDETY[k], 1), + mode_lib->vba.SourceScan[k], + mode_lib->vba.ViewportWidth[k], + mode_lib->vba.ViewportHeight[k], + mode_lib->vba.SwathWidthY[k], + mode_lib->vba.GPUVMEnable, + mode_lib->vba.VMMPageSize, + mode_lib->vba.PTEBufferSizeInRequestsLuma, + mode_lib->vba.PDEProcessingBufIn64KBReqs, + mode_lib->vba.PitchY[k], + mode_lib->vba.DCCMetaPitchY[k], + &mode_lib->vba.MacroTileWidthY[k], + &MetaRowByteY, + &PixelPTEBytesPerRowY, + &mode_lib->vba.PTEBufferSizeNotExceeded[mode_lib->vba.VoltageLevel][0], + &mode_lib->vba.dpte_row_height[k], + &mode_lib->vba.meta_row_height[k]); + mode_lib->vba.PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines( + mode_lib, + mode_lib->vba.VRatio[k], + mode_lib->vba.vtaps[k], + mode_lib->vba.Interlace[k], + mode_lib->vba.ProgressiveToInterlaceUnitInOPP, + mode_lib->vba.SwathHeightY[k], + mode_lib->vba.ViewportYStartY[k], + &mode_lib->vba.VInitPreFillY[k], + &mode_lib->vba.MaxNumSwathY[k]); + + if ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_8)) { + PDEAndMetaPTEBytesFrameC = + CalculateVMAndRowBytes( + mode_lib, + mode_lib->vba.DCCEnable[k], + mode_lib->vba.BlockHeight256BytesC[k], + mode_lib->vba.BlockWidth256BytesC[k], + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.SurfaceTiling[k], + dml_ceil( + mode_lib->vba.BytePerPixelDETC[k], + 2), + mode_lib->vba.SourceScan[k], + mode_lib->vba.ViewportWidth[k] / 2, + mode_lib->vba.ViewportHeight[k] / 2, + mode_lib->vba.SwathWidthY[k] / 2, + mode_lib->vba.GPUVMEnable, + mode_lib->vba.VMMPageSize, + mode_lib->vba.PTEBufferSizeInRequestsLuma, + mode_lib->vba.PDEProcessingBufIn64KBReqs, + mode_lib->vba.PitchC[k], + 0, + &mode_lib->vba.MacroTileWidthC[k], + &MetaRowByteC, + &PixelPTEBytesPerRowC, + &mode_lib->vba.PTEBufferSizeNotExceeded[mode_lib->vba.VoltageLevel][0], + &mode_lib->vba.dpte_row_height_chroma[k], + &mode_lib->vba.meta_row_height_chroma[k]); + mode_lib->vba.PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines( + mode_lib, + mode_lib->vba.VRatio[k] / 2, + mode_lib->vba.VTAPsChroma[k], + mode_lib->vba.Interlace[k], + mode_lib->vba.ProgressiveToInterlaceUnitInOPP, + mode_lib->vba.SwathHeightC[k], + mode_lib->vba.ViewportYStartC[k], + &mode_lib->vba.VInitPreFillC[k], + &mode_lib->vba.MaxNumSwathC[k]); + } else { + PixelPTEBytesPerRowC = 0; + PDEAndMetaPTEBytesFrameC = 0; + MetaRowByteC = 0; + mode_lib->vba.MaxNumSwathC[k] = 0; + mode_lib->vba.PrefetchSourceLinesC[k] = 0; + } + + mode_lib->vba.PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC; + mode_lib->vba.PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + + PDEAndMetaPTEBytesFrameC; + mode_lib->vba.MetaRowByte[k] = MetaRowByteY + MetaRowByteC; + + CalculateActiveRowBandwidth( + mode_lib->vba.GPUVMEnable, + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.VRatio[k], + mode_lib->vba.DCCEnable[k], + mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k], + MetaRowByteY, + MetaRowByteC, + mode_lib->vba.meta_row_height[k], + mode_lib->vba.meta_row_height_chroma[k], + PixelPTEBytesPerRowY, + PixelPTEBytesPerRowC, + mode_lib->vba.dpte_row_height[k], + mode_lib->vba.dpte_row_height_chroma[k], + &mode_lib->vba.meta_row_bw[k], + &mode_lib->vba.dpte_row_bw[k], + &mode_lib->vba.qual_row_bw[k]); + } + + mode_lib->vba.TCalc = 24.0 / mode_lib->vba.DCFCLKDeepSleep; + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.BlendingAndTiming[k] == k) { + if (mode_lib->vba.WritebackEnable[k] == true) { + mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k] = + mode_lib->vba.WritebackLatency + + CalculateWriteBackDelay( + mode_lib->vba.WritebackPixelFormat[k], + mode_lib->vba.WritebackHRatio[k], + mode_lib->vba.WritebackVRatio[k], + mode_lib->vba.WritebackLumaHTaps[k], + mode_lib->vba.WritebackLumaVTaps[k], + mode_lib->vba.WritebackChromaHTaps[k], + mode_lib->vba.WritebackChromaVTaps[k], + mode_lib->vba.WritebackDestinationWidth[k]) + / mode_lib->vba.DISPCLK; + } else + mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k] = 0; + for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) { + if (mode_lib->vba.BlendingAndTiming[j] == k + && mode_lib->vba.WritebackEnable[j] == true) { + mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k] = + dml_max( + mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k], + mode_lib->vba.WritebackLatency + + CalculateWriteBackDelay( + mode_lib->vba.WritebackPixelFormat[j], + mode_lib->vba.WritebackHRatio[j], + mode_lib->vba.WritebackVRatio[j], + mode_lib->vba.WritebackLumaHTaps[j], + mode_lib->vba.WritebackLumaVTaps[j], + mode_lib->vba.WritebackChromaHTaps[j], + mode_lib->vba.WritebackChromaVTaps[j], + mode_lib->vba.WritebackDestinationWidth[j]) + / mode_lib->vba.DISPCLK); + } + } + } + } + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) + for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) + if (mode_lib->vba.BlendingAndTiming[k] == j) + mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k] = + mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][j]; + + mode_lib->vba.VStartupLines = 13; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + mode_lib->vba.MaxVStartupLines[k] = + mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k] + - dml_max( + 1.0, + dml_ceil( + mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k] + / (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]), + 1)); + } + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) + mode_lib->vba.MaximumMaxVStartupLines = dml_max( + mode_lib->vba.MaximumMaxVStartupLines, + mode_lib->vba.MaxVStartupLines[k]); + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + mode_lib->vba.cursor_bw[k] = 0.0; + for (j = 0; j < mode_lib->vba.NumberOfCursors[k]; ++j) + mode_lib->vba.cursor_bw[k] += mode_lib->vba.CursorWidth[k][j] + * mode_lib->vba.CursorBPP[k][j] / 8.0 + / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) + * mode_lib->vba.VRatio[k]; + } + + do { + double MaxTotalRDBandwidth = 0; + bool DestinationLineTimesForPrefetchLessThan2 = false; + bool VRatioPrefetchMoreThan4 = false; + bool prefetch_vm_bw_valid = true; + bool prefetch_row_bw_valid = true; + double TWait = CalculateTWait( + mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb], + mode_lib->vba.DRAMClockChangeLatency, + mode_lib->vba.UrgentLatencyPixelDataOnly, + mode_lib->vba.SREnterPlusExitTime); + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.XFCEnabled[k] == true) { + mode_lib->vba.XFCRemoteSurfaceFlipDelay = + CalculateRemoteSurfaceFlipDelay( + mode_lib, + mode_lib->vba.VRatio[k], + mode_lib->vba.SwathWidthY[k], + dml_ceil( + mode_lib->vba.BytePerPixelDETY[k], + 1), + mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k], + mode_lib->vba.XFCTSlvVupdateOffset, + mode_lib->vba.XFCTSlvVupdateWidth, + mode_lib->vba.XFCTSlvVreadyOffset, + mode_lib->vba.XFCXBUFLatencyTolerance, + mode_lib->vba.XFCFillBWOverhead, + mode_lib->vba.XFCSlvChunkSize, + mode_lib->vba.XFCBusTransportTime, + mode_lib->vba.TCalc, + TWait, + &mode_lib->vba.SrcActiveDrainRate, + &mode_lib->vba.TInitXFill, + &mode_lib->vba.TslvChk); + } else { + mode_lib->vba.XFCRemoteSurfaceFlipDelay = 0; + } + + CalculateDelayAfterScaler(mode_lib, mode_lib->vba.ReturnBW, mode_lib->vba.ReadBandwidthPlaneLuma[k], mode_lib->vba.ReadBandwidthPlaneChroma[k], mode_lib->vba.TotalDataReadBandwidth, + mode_lib->vba.DisplayPipeLineDeliveryTimeLuma[k], mode_lib->vba.DisplayPipeLineDeliveryTimeChroma[k], + mode_lib->vba.DPPCLK[k], mode_lib->vba.DISPCLK, mode_lib->vba.PixelClock[k], mode_lib->vba.DSCDelay[k], mode_lib->vba.DPPPerPlane[k], mode_lib->vba.ScalerEnabled[k], mode_lib->vba.NumberOfCursors[k], + mode_lib->vba.DPPCLKDelaySubtotal, mode_lib->vba.DPPCLKDelaySCL, mode_lib->vba.DPPCLKDelaySCLLBOnly, mode_lib->vba.DPPCLKDelayCNVCFormater, mode_lib->vba.DPPCLKDelayCNVCCursor, mode_lib->vba.DISPCLKDelaySubtotal, + mode_lib->vba.SwathWidthY[k] / mode_lib->vba.HRatio[k], mode_lib->vba.OutputFormat[k], mode_lib->vba.HTotal[k], + mode_lib->vba.SwathWidthSingleDPPY[k], mode_lib->vba.BytePerPixelDETY[k], mode_lib->vba.BytePerPixelDETC[k], mode_lib->vba.SwathHeightY[k], mode_lib->vba.SwathHeightC[k], mode_lib->vba.Interlace[k], + mode_lib->vba.ProgressiveToInterlaceUnitInOPP, &mode_lib->vba.DSTXAfterScaler[k], &mode_lib->vba.DSTYAfterScaler[k]); + + mode_lib->vba.ErrorResult[k] = + CalculatePrefetchSchedule( + mode_lib, + mode_lib->vba.DPPCLK[k], + mode_lib->vba.DISPCLK, + mode_lib->vba.PixelClock[k], + mode_lib->vba.DCFCLKDeepSleep, + mode_lib->vba.DPPPerPlane[k], + mode_lib->vba.NumberOfCursors[k], + mode_lib->vba.VTotal[k] + - mode_lib->vba.VActive[k], + mode_lib->vba.HTotal[k], + mode_lib->vba.MaxInterDCNTileRepeaters, + dml_min( + mode_lib->vba.VStartupLines, + mode_lib->vba.MaxVStartupLines[k]), + mode_lib->vba.GPUVMMaxPageTableLevels, + mode_lib->vba.GPUVMEnable, + mode_lib->vba.DynamicMetadataEnable[k], + mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired[k], + mode_lib->vba.DynamicMetadataTransmittedBytes[k], + mode_lib->vba.DCCEnable[k], + mode_lib->vba.UrgentLatencyPixelDataOnly, + mode_lib->vba.UrgentExtraLatency, + mode_lib->vba.TCalc, + mode_lib->vba.PDEAndMetaPTEBytesFrame[k], + mode_lib->vba.MetaRowByte[k], + mode_lib->vba.PixelPTEBytesPerRow[k], + mode_lib->vba.PrefetchSourceLinesY[k], + mode_lib->vba.SwathWidthY[k], + mode_lib->vba.BytePerPixelDETY[k], + mode_lib->vba.VInitPreFillY[k], + mode_lib->vba.MaxNumSwathY[k], + mode_lib->vba.PrefetchSourceLinesC[k], + mode_lib->vba.BytePerPixelDETC[k], + mode_lib->vba.VInitPreFillC[k], + mode_lib->vba.MaxNumSwathC[k], + mode_lib->vba.SwathHeightY[k], + mode_lib->vba.SwathHeightC[k], + TWait, + mode_lib->vba.XFCEnabled[k], + mode_lib->vba.XFCRemoteSurfaceFlipDelay, + mode_lib->vba.Interlace[k], + mode_lib->vba.ProgressiveToInterlaceUnitInOPP, + mode_lib->vba.DSTXAfterScaler[k], + mode_lib->vba.DSTYAfterScaler[k], + &mode_lib->vba.DestinationLinesForPrefetch[k], + &mode_lib->vba.PrefetchBandwidth[k], + &mode_lib->vba.DestinationLinesToRequestVMInVBlank[k], + &mode_lib->vba.DestinationLinesToRequestRowInVBlank[k], + &mode_lib->vba.VRatioPrefetchY[k], + &mode_lib->vba.VRatioPrefetchC[k], + &mode_lib->vba.RequiredPrefetchPixDataBWLuma[k], + &mode_lib->vba.Tno_bw[k], + &mode_lib->vba.VUpdateOffsetPix[k], + &mode_lib->vba.VUpdateWidthPix[k], + &mode_lib->vba.VReadyOffsetPix[k]); + + if (mode_lib->vba.BlendingAndTiming[k] == k) { + mode_lib->vba.VStartup[k] = dml_min( + mode_lib->vba.VStartupLines, + mode_lib->vba.MaxVStartupLines[k]); + if (mode_lib->vba.VStartupRequiredWhenNotEnoughTimeForDynamicMetadata + != 0) { + mode_lib->vba.VStartup[k] = + mode_lib->vba.VStartupRequiredWhenNotEnoughTimeForDynamicMetadata; + } + } else { + mode_lib->vba.VStartup[k] = + dml_min( + mode_lib->vba.VStartupLines, + mode_lib->vba.MaxVStartupLines[mode_lib->vba.BlendingAndTiming[k]]); + } + } + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + + if (mode_lib->vba.PDEAndMetaPTEBytesFrame[k] == 0) + mode_lib->vba.prefetch_vm_bw[k] = 0; + else if (mode_lib->vba.DestinationLinesToRequestVMInVBlank[k] > 0) { + mode_lib->vba.prefetch_vm_bw[k] = + (double) mode_lib->vba.PDEAndMetaPTEBytesFrame[k] + / (mode_lib->vba.DestinationLinesToRequestVMInVBlank[k] + * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]); + } else { + mode_lib->vba.prefetch_vm_bw[k] = 0; + prefetch_vm_bw_valid = false; + } + if (mode_lib->vba.MetaRowByte[k] + mode_lib->vba.PixelPTEBytesPerRow[k] + == 0) + mode_lib->vba.prefetch_row_bw[k] = 0; + else if (mode_lib->vba.DestinationLinesToRequestRowInVBlank[k] > 0) { + mode_lib->vba.prefetch_row_bw[k] = + (double) (mode_lib->vba.MetaRowByte[k] + + mode_lib->vba.PixelPTEBytesPerRow[k]) + / (mode_lib->vba.DestinationLinesToRequestRowInVBlank[k] + * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]); + } else { + mode_lib->vba.prefetch_row_bw[k] = 0; + prefetch_row_bw_valid = false; + } + + MaxTotalRDBandwidth = + MaxTotalRDBandwidth + mode_lib->vba.cursor_bw[k] + + dml_max( + mode_lib->vba.prefetch_vm_bw[k], + dml_max( + mode_lib->vba.prefetch_row_bw[k], + dml_max( + mode_lib->vba.ReadBandwidthPlaneLuma[k] + + mode_lib->vba.ReadBandwidthPlaneChroma[k], + mode_lib->vba.RequiredPrefetchPixDataBWLuma[k]) + + mode_lib->vba.meta_row_bw[k] + + mode_lib->vba.dpte_row_bw[k])); + + if (mode_lib->vba.DestinationLinesForPrefetch[k] < 2) + DestinationLineTimesForPrefetchLessThan2 = true; + if (mode_lib->vba.VRatioPrefetchY[k] > 4 + || mode_lib->vba.VRatioPrefetchC[k] > 4) + VRatioPrefetchMoreThan4 = true; + } + + if (MaxTotalRDBandwidth <= mode_lib->vba.ReturnBW && prefetch_vm_bw_valid + && prefetch_row_bw_valid && !VRatioPrefetchMoreThan4 + && !DestinationLineTimesForPrefetchLessThan2) + mode_lib->vba.PrefetchModeSupported = true; + else { + mode_lib->vba.PrefetchModeSupported = false; + dml_print( + "DML: CalculatePrefetchSchedule ***failed***. Bandwidth violation. Results are NOT valid\n"); + } + + if (mode_lib->vba.PrefetchModeSupported == true) { + double final_flip_bw[DC__NUM_DPP__MAX]; + unsigned int ImmediateFlipBytes[DC__NUM_DPP__MAX]; + double total_dcn_read_bw_with_flip = 0; + + mode_lib->vba.BandwidthAvailableForImmediateFlip = mode_lib->vba.ReturnBW; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + mode_lib->vba.BandwidthAvailableForImmediateFlip = + mode_lib->vba.BandwidthAvailableForImmediateFlip + - mode_lib->vba.cursor_bw[k] + - dml_max( + mode_lib->vba.ReadBandwidthPlaneLuma[k] + + mode_lib->vba.ReadBandwidthPlaneChroma[k] + + mode_lib->vba.qual_row_bw[k], + mode_lib->vba.PrefetchBandwidth[k]); + } + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + ImmediateFlipBytes[k] = 0; + if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8 + && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) { + ImmediateFlipBytes[k] = + mode_lib->vba.PDEAndMetaPTEBytesFrame[k] + + mode_lib->vba.MetaRowByte[k] + + mode_lib->vba.PixelPTEBytesPerRow[k]; + } + } + mode_lib->vba.TotImmediateFlipBytes = 0; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8 + && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) { + mode_lib->vba.TotImmediateFlipBytes = + mode_lib->vba.TotImmediateFlipBytes + + ImmediateFlipBytes[k]; + } + } + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + CalculateFlipSchedule( + mode_lib, + mode_lib->vba.UrgentExtraLatency, + mode_lib->vba.UrgentLatencyPixelDataOnly, + mode_lib->vba.GPUVMMaxPageTableLevels, + mode_lib->vba.GPUVMEnable, + mode_lib->vba.BandwidthAvailableForImmediateFlip, + mode_lib->vba.TotImmediateFlipBytes, + mode_lib->vba.SourcePixelFormat[k], + ImmediateFlipBytes[k], + mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k], + mode_lib->vba.VRatio[k], + mode_lib->vba.Tno_bw[k], + mode_lib->vba.PDEAndMetaPTEBytesFrame[k], + mode_lib->vba.MetaRowByte[k], + mode_lib->vba.PixelPTEBytesPerRow[k], + mode_lib->vba.DCCEnable[k], + mode_lib->vba.dpte_row_height[k], + mode_lib->vba.meta_row_height[k], + mode_lib->vba.qual_row_bw[k], + &mode_lib->vba.DestinationLinesToRequestVMInImmediateFlip[k], + &mode_lib->vba.DestinationLinesToRequestRowInImmediateFlip[k], + &final_flip_bw[k], + &mode_lib->vba.ImmediateFlipSupportedForPipe[k]); + } + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + total_dcn_read_bw_with_flip = + total_dcn_read_bw_with_flip + + mode_lib->vba.cursor_bw[k] + + dml_max( + mode_lib->vba.prefetch_vm_bw[k], + dml_max( + mode_lib->vba.prefetch_row_bw[k], + final_flip_bw[k] + + dml_max( + mode_lib->vba.ReadBandwidthPlaneLuma[k] + + mode_lib->vba.ReadBandwidthPlaneChroma[k], + mode_lib->vba.RequiredPrefetchPixDataBWLuma[k]))); + } + mode_lib->vba.ImmediateFlipSupported = true; + if (total_dcn_read_bw_with_flip > mode_lib->vba.ReturnBW) { + mode_lib->vba.ImmediateFlipSupported = false; + } + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.ImmediateFlipSupportedForPipe[k] == false) { + mode_lib->vba.ImmediateFlipSupported = false; + } + } + } else { + mode_lib->vba.ImmediateFlipSupported = false; + } + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.ErrorResult[k]) { + mode_lib->vba.PrefetchModeSupported = false; + dml_print( + "DML: CalculatePrefetchSchedule ***failed***. Prefetch schedule violation. Results are NOT valid\n"); + } + } + + mode_lib->vba.VStartupLines = mode_lib->vba.VStartupLines + 1; + } while (!((mode_lib->vba.PrefetchModeSupported + && (!mode_lib->vba.ImmediateFlipSupport + || mode_lib->vba.ImmediateFlipSupported)) + || mode_lib->vba.MaximumMaxVStartupLines < mode_lib->vba.VStartupLines)); + + //Display Pipeline Delivery Time in Prefetch + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.VRatioPrefetchY[k] <= 1) { + mode_lib->vba.DisplayPipeLineDeliveryTimeLumaPrefetch[k] = + mode_lib->vba.SwathWidthY[k] * mode_lib->vba.DPPPerPlane[k] + / mode_lib->vba.HRatio[k] + / mode_lib->vba.PixelClock[k]; + } else { + mode_lib->vba.DisplayPipeLineDeliveryTimeLumaPrefetch[k] = + mode_lib->vba.SwathWidthY[k] + / mode_lib->vba.PSCL_THROUGHPUT_LUMA[k] + / mode_lib->vba.DPPCLK[k]; + } + if (mode_lib->vba.BytePerPixelDETC[k] == 0) { + mode_lib->vba.DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0; + } else { + if (mode_lib->vba.VRatioPrefetchC[k] <= 1) { + mode_lib->vba.DisplayPipeLineDeliveryTimeChromaPrefetch[k] = + mode_lib->vba.SwathWidthY[k] + * mode_lib->vba.DPPPerPlane[k] + / mode_lib->vba.HRatio[k] + / mode_lib->vba.PixelClock[k]; + } else { + mode_lib->vba.DisplayPipeLineDeliveryTimeChromaPrefetch[k] = + mode_lib->vba.SwathWidthY[k] + / mode_lib->vba.PSCL_THROUGHPUT_LUMA[k] + / mode_lib->vba.DPPCLK[k]; + } + } + } + + // Min TTUVBlank + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 0) { + mode_lib->vba.AllowDRAMClockChangeDuringVBlank[k] = true; + mode_lib->vba.AllowDRAMSelfRefreshDuringVBlank[k] = true; + mode_lib->vba.MinTTUVBlank[k] = dml_max( + mode_lib->vba.DRAMClockChangeWatermark, + dml_max( + mode_lib->vba.StutterEnterPlusExitWatermark, + mode_lib->vba.UrgentWatermark)); + } else if (mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 1) { + mode_lib->vba.AllowDRAMClockChangeDuringVBlank[k] = false; + mode_lib->vba.AllowDRAMSelfRefreshDuringVBlank[k] = true; + mode_lib->vba.MinTTUVBlank[k] = dml_max( + mode_lib->vba.StutterEnterPlusExitWatermark, + mode_lib->vba.UrgentWatermark); + } else { + mode_lib->vba.AllowDRAMClockChangeDuringVBlank[k] = false; + mode_lib->vba.AllowDRAMSelfRefreshDuringVBlank[k] = false; + mode_lib->vba.MinTTUVBlank[k] = mode_lib->vba.UrgentWatermark; + } + if (!mode_lib->vba.DynamicMetadataEnable[k]) + mode_lib->vba.MinTTUVBlank[k] = mode_lib->vba.TCalc + + mode_lib->vba.MinTTUVBlank[k]; + } + + // DCC Configuration + mode_lib->vba.ActiveDPPs = 0; + // NB P-State/DRAM Clock Change Support + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + mode_lib->vba.ActiveDPPs = mode_lib->vba.ActiveDPPs + mode_lib->vba.DPPPerPlane[k]; + } + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + double EffectiveLBLatencyHidingY; + double EffectiveLBLatencyHidingC; + double DPPOutputBufferLinesY; + double DPPOutputBufferLinesC; + double DPPOPPBufferingY; + double MaxDETBufferingTimeY; + double ActiveDRAMClockChangeLatencyMarginY; + + mode_lib->vba.LBLatencyHidingSourceLinesY = + dml_min( + mode_lib->vba.MaxLineBufferLines, + (unsigned int) dml_floor( + (double) mode_lib->vba.LineBufferSize + / mode_lib->vba.LBBitPerPixel[k] + / (mode_lib->vba.SwathWidthY[k] + / dml_max( + mode_lib->vba.HRatio[k], + 1.0)), + 1)) - (mode_lib->vba.vtaps[k] - 1); + + mode_lib->vba.LBLatencyHidingSourceLinesC = + dml_min( + mode_lib->vba.MaxLineBufferLines, + (unsigned int) dml_floor( + (double) mode_lib->vba.LineBufferSize + / mode_lib->vba.LBBitPerPixel[k] + / (mode_lib->vba.SwathWidthY[k] + / 2.0 + / dml_max( + mode_lib->vba.HRatio[k] + / 2, + 1.0)), + 1)) + - (mode_lib->vba.VTAPsChroma[k] - 1); + + EffectiveLBLatencyHidingY = mode_lib->vba.LBLatencyHidingSourceLinesY + / mode_lib->vba.VRatio[k] + * (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]); + + EffectiveLBLatencyHidingC = mode_lib->vba.LBLatencyHidingSourceLinesC + / (mode_lib->vba.VRatio[k] / 2) + * (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]); + + if (mode_lib->vba.SwathWidthY[k] > 2 * mode_lib->vba.DPPOutputBufferPixels) { + DPPOutputBufferLinesY = mode_lib->vba.DPPOutputBufferPixels + / mode_lib->vba.SwathWidthY[k]; + } else if (mode_lib->vba.SwathWidthY[k] > mode_lib->vba.DPPOutputBufferPixels) { + DPPOutputBufferLinesY = 0.5; + } else { + DPPOutputBufferLinesY = 1; + } + + if (mode_lib->vba.SwathWidthY[k] / 2 > 2 * mode_lib->vba.DPPOutputBufferPixels) { + DPPOutputBufferLinesC = mode_lib->vba.DPPOutputBufferPixels + / (mode_lib->vba.SwathWidthY[k] / 2); + } else if (mode_lib->vba.SwathWidthY[k] / 2 > mode_lib->vba.DPPOutputBufferPixels) { + DPPOutputBufferLinesC = 0.5; + } else { + DPPOutputBufferLinesC = 1; + } + + DPPOPPBufferingY = (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) + * (DPPOutputBufferLinesY + mode_lib->vba.OPPOutputBufferLines); + MaxDETBufferingTimeY = mode_lib->vba.FullDETBufferingTimeY[k] + + (mode_lib->vba.LinesInDETY[k] + - mode_lib->vba.LinesInDETYRoundedDownToSwath[k]) + / mode_lib->vba.SwathHeightY[k] + * (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]); + + ActiveDRAMClockChangeLatencyMarginY = DPPOPPBufferingY + EffectiveLBLatencyHidingY + + MaxDETBufferingTimeY - mode_lib->vba.DRAMClockChangeWatermark; + + if (mode_lib->vba.ActiveDPPs > 1) { + ActiveDRAMClockChangeLatencyMarginY = + ActiveDRAMClockChangeLatencyMarginY + - (1 - 1 / (mode_lib->vba.ActiveDPPs - 1)) + * mode_lib->vba.SwathHeightY[k] + * (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]); + } + + if (mode_lib->vba.BytePerPixelDETC[k] > 0) { + double DPPOPPBufferingC = (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]) + * (DPPOutputBufferLinesC + + mode_lib->vba.OPPOutputBufferLines); + double MaxDETBufferingTimeC = + mode_lib->vba.FullDETBufferingTimeC[k] + + (mode_lib->vba.LinesInDETC[k] + - mode_lib->vba.LinesInDETCRoundedDownToSwath[k]) + / mode_lib->vba.SwathHeightC[k] + * (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]); + double ActiveDRAMClockChangeLatencyMarginC = DPPOPPBufferingC + + EffectiveLBLatencyHidingC + MaxDETBufferingTimeC + - mode_lib->vba.DRAMClockChangeWatermark; + + if (mode_lib->vba.ActiveDPPs > 1) { + ActiveDRAMClockChangeLatencyMarginC = + ActiveDRAMClockChangeLatencyMarginC + - (1 + - 1 + / (mode_lib->vba.ActiveDPPs + - 1)) + * mode_lib->vba.SwathHeightC[k] + * (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]); + } + mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min( + ActiveDRAMClockChangeLatencyMarginY, + ActiveDRAMClockChangeLatencyMarginC); + } else { + mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = + ActiveDRAMClockChangeLatencyMarginY; + } + + if (mode_lib->vba.WritebackEnable[k]) { + double WritebackDRAMClockChangeLatencyMargin; + + if (mode_lib->vba.WritebackPixelFormat[k] == dm_444_32) { + WritebackDRAMClockChangeLatencyMargin = + (double) (mode_lib->vba.WritebackInterfaceLumaBufferSize + + mode_lib->vba.WritebackInterfaceChromaBufferSize) + / (mode_lib->vba.WritebackDestinationWidth[k] + * mode_lib->vba.WritebackDestinationHeight[k] + / (mode_lib->vba.WritebackSourceHeight[k] + * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]) + * 4) + - mode_lib->vba.WritebackDRAMClockChangeWatermark; + } else if (mode_lib->vba.WritebackPixelFormat[k] == dm_420_10) { + WritebackDRAMClockChangeLatencyMargin = + dml_min( + (double) mode_lib->vba.WritebackInterfaceLumaBufferSize + * 8.0 / 10, + 2.0 + * mode_lib->vba.WritebackInterfaceChromaBufferSize + * 8 / 10) + / (mode_lib->vba.WritebackDestinationWidth[k] + * mode_lib->vba.WritebackDestinationHeight[k] + / (mode_lib->vba.WritebackSourceHeight[k] + * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k])) + - mode_lib->vba.WritebackDRAMClockChangeWatermark; + } else { + WritebackDRAMClockChangeLatencyMargin = + dml_min( + (double) mode_lib->vba.WritebackInterfaceLumaBufferSize, + 2.0 + * mode_lib->vba.WritebackInterfaceChromaBufferSize) + / (mode_lib->vba.WritebackDestinationWidth[k] + * mode_lib->vba.WritebackDestinationHeight[k] + / (mode_lib->vba.WritebackSourceHeight[k] + * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k])) + - mode_lib->vba.WritebackDRAMClockChangeWatermark; + } + mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min( + mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k], + WritebackDRAMClockChangeLatencyMargin); + } + } + + { + float SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999; + int PlaneWithMinActiveDRAMClockChangeMargin = -1; + + mode_lib->vba.MinActiveDRAMClockChangeMargin = 999999; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] + < mode_lib->vba.MinActiveDRAMClockChangeMargin) { + mode_lib->vba.MinActiveDRAMClockChangeMargin = + mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k]; + if (mode_lib->vba.BlendingAndTiming[k] == k) { + PlaneWithMinActiveDRAMClockChangeMargin = k; + } else { + for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) { + if (mode_lib->vba.BlendingAndTiming[k] == j) { + PlaneWithMinActiveDRAMClockChangeMargin = j; + } + } + } + } + } + + mode_lib->vba.MinActiveDRAMClockChangeLatencySupported = + mode_lib->vba.MinActiveDRAMClockChangeMargin + + mode_lib->vba.DRAMClockChangeLatency; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (mode_lib->vba.BlendingAndTiming[k] == k)) + && !(mode_lib->vba.BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin) + && mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] + < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) { + SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = + mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k]; + } + } + + if (mode_lib->vba.DRAMClockChangeSupportsVActive && + mode_lib->vba.MinActiveDRAMClockChangeMargin > 60) { + mode_lib->vba.DRAMClockChangeWatermark += 25; + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 0) { + if (mode_lib->vba.DRAMClockChangeWatermark > + dml_max(mode_lib->vba.StutterEnterPlusExitWatermark, mode_lib->vba.UrgentWatermark)) + mode_lib->vba.MinTTUVBlank[k] += 25; + } + } + + mode_lib->vba.DRAMClockChangeSupport[0][0] = dm_dram_clock_change_vactive; + } else if (mode_lib->vba.DummyPStateCheck && + mode_lib->vba.MinActiveDRAMClockChangeMargin > 0) { + mode_lib->vba.DRAMClockChangeSupport[0][0] = dm_dram_clock_change_vactive; + } else { + if ((mode_lib->vba.SynchronizedVBlank + || mode_lib->vba.NumberOfActivePlanes == 1 + || (SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0 && + mode_lib->vba.AllowDramClockChangeOneDisplayVactive)) + && mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 0) { + mode_lib->vba.DRAMClockChangeSupport[0][0] = dm_dram_clock_change_vblank; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (!mode_lib->vba.AllowDRAMClockChangeDuringVBlank[k]) { + mode_lib->vba.DRAMClockChangeSupport[0][0] = + dm_dram_clock_change_unsupported; + } + } + } else { + mode_lib->vba.DRAMClockChangeSupport[0][0] = dm_dram_clock_change_unsupported; + } + } + } + for (k = 0; k <= mode_lib->vba.soc.num_states; k++) + for (j = 0; j < 2; j++) + mode_lib->vba.DRAMClockChangeSupport[k][j] = mode_lib->vba.DRAMClockChangeSupport[0][0]; + + //XFC Parameters: + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.XFCEnabled[k] == true) { + double TWait; + + mode_lib->vba.XFCSlaveVUpdateOffset[k] = mode_lib->vba.XFCTSlvVupdateOffset; + mode_lib->vba.XFCSlaveVupdateWidth[k] = mode_lib->vba.XFCTSlvVupdateWidth; + mode_lib->vba.XFCSlaveVReadyOffset[k] = mode_lib->vba.XFCTSlvVreadyOffset; + TWait = CalculateTWait( + mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb], + mode_lib->vba.DRAMClockChangeLatency, + mode_lib->vba.UrgentLatencyPixelDataOnly, + mode_lib->vba.SREnterPlusExitTime); + mode_lib->vba.XFCRemoteSurfaceFlipDelay = CalculateRemoteSurfaceFlipDelay( + mode_lib, + mode_lib->vba.VRatio[k], + mode_lib->vba.SwathWidthY[k], + dml_ceil(mode_lib->vba.BytePerPixelDETY[k], 1), + mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k], + mode_lib->vba.XFCTSlvVupdateOffset, + mode_lib->vba.XFCTSlvVupdateWidth, + mode_lib->vba.XFCTSlvVreadyOffset, + mode_lib->vba.XFCXBUFLatencyTolerance, + mode_lib->vba.XFCFillBWOverhead, + mode_lib->vba.XFCSlvChunkSize, + mode_lib->vba.XFCBusTransportTime, + mode_lib->vba.TCalc, + TWait, + &mode_lib->vba.SrcActiveDrainRate, + &mode_lib->vba.TInitXFill, + &mode_lib->vba.TslvChk); + mode_lib->vba.XFCRemoteSurfaceFlipLatency[k] = + dml_floor( + mode_lib->vba.XFCRemoteSurfaceFlipDelay + / (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]), + 1); + mode_lib->vba.XFCTransferDelay[k] = + dml_ceil( + mode_lib->vba.XFCBusTransportTime + / (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]), + 1); + mode_lib->vba.XFCPrechargeDelay[k] = + dml_ceil( + (mode_lib->vba.XFCBusTransportTime + + mode_lib->vba.TInitXFill + + mode_lib->vba.TslvChk) + / (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]), + 1); + mode_lib->vba.InitFillLevel = mode_lib->vba.XFCXBUFLatencyTolerance + * mode_lib->vba.SrcActiveDrainRate; + mode_lib->vba.FinalFillMargin = + (mode_lib->vba.DestinationLinesToRequestVMInVBlank[k] + + mode_lib->vba.DestinationLinesToRequestRowInVBlank[k]) + * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k] + * mode_lib->vba.SrcActiveDrainRate + + mode_lib->vba.XFCFillConstant; + mode_lib->vba.FinalFillLevel = mode_lib->vba.XFCRemoteSurfaceFlipDelay + * mode_lib->vba.SrcActiveDrainRate + + mode_lib->vba.FinalFillMargin; + mode_lib->vba.RemainingFillLevel = dml_max( + 0.0, + mode_lib->vba.FinalFillLevel - mode_lib->vba.InitFillLevel); + mode_lib->vba.TFinalxFill = mode_lib->vba.RemainingFillLevel + / (mode_lib->vba.SrcActiveDrainRate + * mode_lib->vba.XFCFillBWOverhead / 100); + mode_lib->vba.XFCPrefetchMargin[k] = + mode_lib->vba.XFCRemoteSurfaceFlipDelay + + mode_lib->vba.TFinalxFill + + (mode_lib->vba.DestinationLinesToRequestVMInVBlank[k] + + mode_lib->vba.DestinationLinesToRequestRowInVBlank[k]) + * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]; + } else { + mode_lib->vba.XFCSlaveVUpdateOffset[k] = 0; + mode_lib->vba.XFCSlaveVupdateWidth[k] = 0; + mode_lib->vba.XFCSlaveVReadyOffset[k] = 0; + mode_lib->vba.XFCRemoteSurfaceFlipLatency[k] = 0; + mode_lib->vba.XFCPrechargeDelay[k] = 0; + mode_lib->vba.XFCTransferDelay[k] = 0; + mode_lib->vba.XFCPrefetchMargin[k] = 0; + } + } + { + unsigned int VStartupMargin = 0; + bool FirstMainPlane = true; + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.BlendingAndTiming[k] == k) { + unsigned int Margin = (mode_lib->vba.MaxVStartupLines[k] - mode_lib->vba.VStartup[k]) + * mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]; + + if (FirstMainPlane) { + VStartupMargin = Margin; + FirstMainPlane = false; + } else + VStartupMargin = dml_min(VStartupMargin, Margin); + } + + if (mode_lib->vba.UseMaximumVStartup) { + if (mode_lib->vba.VTotal_Max[k] == mode_lib->vba.VTotal[k]) { + //only use max vstart if it is not drr or lateflip. + mode_lib->vba.VStartup[k] = mode_lib->vba.MaxVStartupLines[mode_lib->vba.BlendingAndTiming[k]]; + } + } + } +} +} + +static void dml20v2_DisplayPipeConfiguration(struct display_mode_lib *mode_lib) +{ + double BytePerPixDETY; + double BytePerPixDETC; + double Read256BytesBlockHeightY; + double Read256BytesBlockHeightC; + double Read256BytesBlockWidthY; + double Read256BytesBlockWidthC; + double MaximumSwathHeightY; + double MaximumSwathHeightC; + double MinimumSwathHeightY; + double MinimumSwathHeightC; + double SwathWidth; + double SwathWidthGranularityY; + double SwathWidthGranularityC; + double RoundedUpMaxSwathSizeBytesY; + double RoundedUpMaxSwathSizeBytesC; + unsigned int j, k; + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + bool MainPlaneDoesODMCombine = false; + + if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) { + BytePerPixDETY = 8; + BytePerPixDETC = 0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32) { + BytePerPixDETY = 4; + BytePerPixDETC = 0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_16) { + BytePerPixDETY = 2; + BytePerPixDETC = 0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_8) { + BytePerPixDETY = 1; + BytePerPixDETC = 0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) { + BytePerPixDETY = 1; + BytePerPixDETC = 2; + } else { + BytePerPixDETY = 4.0 / 3.0; + BytePerPixDETC = 8.0 / 3.0; + } + + if ((mode_lib->vba.SourcePixelFormat[k] == dm_444_64 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_32 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_16 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_8)) { + if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) { + Read256BytesBlockHeightY = 1; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) { + Read256BytesBlockHeightY = 4; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_16) { + Read256BytesBlockHeightY = 8; + } else { + Read256BytesBlockHeightY = 16; + } + Read256BytesBlockWidthY = 256 / dml_ceil(BytePerPixDETY, 1) + / Read256BytesBlockHeightY; + Read256BytesBlockHeightC = 0; + Read256BytesBlockWidthC = 0; + } else { + if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) { + Read256BytesBlockHeightY = 1; + Read256BytesBlockHeightC = 1; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) { + Read256BytesBlockHeightY = 16; + Read256BytesBlockHeightC = 8; + } else { + Read256BytesBlockHeightY = 8; + Read256BytesBlockHeightC = 8; + } + Read256BytesBlockWidthY = 256 / dml_ceil(BytePerPixDETY, 1) + / Read256BytesBlockHeightY; + Read256BytesBlockWidthC = 256 / dml_ceil(BytePerPixDETC, 2) + / Read256BytesBlockHeightC; + } + + if (mode_lib->vba.SourceScan[k] == dm_horz) { + MaximumSwathHeightY = Read256BytesBlockHeightY; + MaximumSwathHeightC = Read256BytesBlockHeightC; + } else { + MaximumSwathHeightY = Read256BytesBlockWidthY; + MaximumSwathHeightC = Read256BytesBlockWidthC; + } + + if ((mode_lib->vba.SourcePixelFormat[k] == dm_444_64 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_32 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_16 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_8)) { + if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear + || (mode_lib->vba.SourcePixelFormat[k] == dm_444_64 + && (mode_lib->vba.SurfaceTiling[k] + == dm_sw_4kb_s + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_4kb_s_x + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_64kb_s + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_64kb_s_t + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_64kb_s_x + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_var_s + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_var_s_x) + && mode_lib->vba.SourceScan[k] == dm_horz)) { + MinimumSwathHeightY = MaximumSwathHeightY; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_8 + && mode_lib->vba.SourceScan[k] != dm_horz) { + MinimumSwathHeightY = MaximumSwathHeightY; + } else { + MinimumSwathHeightY = MaximumSwathHeightY / 2.0; + } + MinimumSwathHeightC = MaximumSwathHeightC; + } else { + if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) { + MinimumSwathHeightY = MaximumSwathHeightY; + MinimumSwathHeightC = MaximumSwathHeightC; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8 + && mode_lib->vba.SourceScan[k] == dm_horz) { + MinimumSwathHeightY = MaximumSwathHeightY / 2.0; + MinimumSwathHeightC = MaximumSwathHeightC; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10 + && mode_lib->vba.SourceScan[k] == dm_horz) { + MinimumSwathHeightC = MaximumSwathHeightC / 2.0; + MinimumSwathHeightY = MaximumSwathHeightY; + } else { + MinimumSwathHeightY = MaximumSwathHeightY; + MinimumSwathHeightC = MaximumSwathHeightC; + } + } + + if (mode_lib->vba.SourceScan[k] == dm_horz) { + SwathWidth = mode_lib->vba.ViewportWidth[k]; + } else { + SwathWidth = mode_lib->vba.ViewportHeight[k]; + } + + if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) { + MainPlaneDoesODMCombine = true; + } + for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) { + if (mode_lib->vba.BlendingAndTiming[k] == j + && mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) { + MainPlaneDoesODMCombine = true; + } + } + + if (MainPlaneDoesODMCombine == true) { + SwathWidth = dml_min( + SwathWidth, + mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k]); + } else { + if (mode_lib->vba.DPPPerPlane[k] == 0) + SwathWidth = 0; + else + SwathWidth = SwathWidth / mode_lib->vba.DPPPerPlane[k]; + } + + SwathWidthGranularityY = 256 / dml_ceil(BytePerPixDETY, 1) / MaximumSwathHeightY; + RoundedUpMaxSwathSizeBytesY = (dml_ceil( + (double) (SwathWidth - 1), + SwathWidthGranularityY) + SwathWidthGranularityY) * BytePerPixDETY + * MaximumSwathHeightY; + if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10) { + RoundedUpMaxSwathSizeBytesY = dml_ceil(RoundedUpMaxSwathSizeBytesY, 256) + + 256; + } + if (MaximumSwathHeightC > 0) { + SwathWidthGranularityC = 256.0 / dml_ceil(BytePerPixDETC, 2) + / MaximumSwathHeightC; + RoundedUpMaxSwathSizeBytesC = (dml_ceil( + (double) (SwathWidth / 2.0 - 1), + SwathWidthGranularityC) + SwathWidthGranularityC) + * BytePerPixDETC * MaximumSwathHeightC; + if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10) { + RoundedUpMaxSwathSizeBytesC = dml_ceil( + RoundedUpMaxSwathSizeBytesC, + 256) + 256; + } + } else + RoundedUpMaxSwathSizeBytesC = 0.0; + + if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC + <= mode_lib->vba.DETBufferSizeInKByte[0] * 1024.0 / 2.0) { + mode_lib->vba.SwathHeightY[k] = MaximumSwathHeightY; + mode_lib->vba.SwathHeightC[k] = MaximumSwathHeightC; + } else { + mode_lib->vba.SwathHeightY[k] = MinimumSwathHeightY; + mode_lib->vba.SwathHeightC[k] = MinimumSwathHeightC; + } + + if (mode_lib->vba.SwathHeightC[k] == 0) { + mode_lib->vba.DETBufferSizeY[k] = mode_lib->vba.DETBufferSizeInKByte[0] * 1024; + mode_lib->vba.DETBufferSizeC[k] = 0; + } else if (mode_lib->vba.SwathHeightY[k] <= mode_lib->vba.SwathHeightC[k]) { + mode_lib->vba.DETBufferSizeY[k] = mode_lib->vba.DETBufferSizeInKByte[0] + * 1024.0 / 2; + mode_lib->vba.DETBufferSizeC[k] = mode_lib->vba.DETBufferSizeInKByte[0] + * 1024.0 / 2; + } else { + mode_lib->vba.DETBufferSizeY[k] = mode_lib->vba.DETBufferSizeInKByte[0] + * 1024.0 * 2 / 3; + mode_lib->vba.DETBufferSizeC[k] = mode_lib->vba.DETBufferSizeInKByte[0] + * 1024.0 / 3; + } + } +} + +static double CalculateTWait( + unsigned int PrefetchMode, + double DRAMClockChangeLatency, + double UrgentLatencyPixelDataOnly, + double SREnterPlusExitTime) +{ + if (PrefetchMode == 0) { + return dml_max( + DRAMClockChangeLatency + UrgentLatencyPixelDataOnly, + dml_max(SREnterPlusExitTime, UrgentLatencyPixelDataOnly)); + } else if (PrefetchMode == 1) { + return dml_max(SREnterPlusExitTime, UrgentLatencyPixelDataOnly); + } else { + return UrgentLatencyPixelDataOnly; + } +} + +static double CalculateRemoteSurfaceFlipDelay( + struct display_mode_lib *mode_lib, + double VRatio, + double SwathWidth, + double Bpp, + double LineTime, + double XFCTSlvVupdateOffset, + double XFCTSlvVupdateWidth, + double XFCTSlvVreadyOffset, + double XFCXBUFLatencyTolerance, + double XFCFillBWOverhead, + double XFCSlvChunkSize, + double XFCBusTransportTime, + double TCalc, + double TWait, + double *SrcActiveDrainRate, + double *TInitXFill, + double *TslvChk) +{ + double TSlvSetup, AvgfillRate, result; + + *SrcActiveDrainRate = VRatio * SwathWidth * Bpp / LineTime; + TSlvSetup = XFCTSlvVupdateOffset + XFCTSlvVupdateWidth + XFCTSlvVreadyOffset; + *TInitXFill = XFCXBUFLatencyTolerance / (1 + XFCFillBWOverhead / 100); + AvgfillRate = *SrcActiveDrainRate * (1 + XFCFillBWOverhead / 100); + *TslvChk = XFCSlvChunkSize / AvgfillRate; + dml_print( + "DML::CalculateRemoteSurfaceFlipDelay: SrcActiveDrainRate: %f\n", + *SrcActiveDrainRate); + dml_print("DML::CalculateRemoteSurfaceFlipDelay: TSlvSetup: %f\n", TSlvSetup); + dml_print("DML::CalculateRemoteSurfaceFlipDelay: TInitXFill: %f\n", *TInitXFill); + dml_print("DML::CalculateRemoteSurfaceFlipDelay: AvgfillRate: %f\n", AvgfillRate); + dml_print("DML::CalculateRemoteSurfaceFlipDelay: TslvChk: %f\n", *TslvChk); + result = 2 * XFCBusTransportTime + TSlvSetup + TCalc + TWait + *TslvChk + *TInitXFill; // TODO: This doesn't seem to match programming guide + dml_print("DML::CalculateRemoteSurfaceFlipDelay: RemoteSurfaceFlipDelay: %f\n", result); + return result; +} + +static double CalculateWriteBackDelay( + enum source_format_class WritebackPixelFormat, + double WritebackHRatio, + double WritebackVRatio, + unsigned int WritebackLumaHTaps, + unsigned int WritebackLumaVTaps, + unsigned int WritebackChromaHTaps, + unsigned int WritebackChromaVTaps, + unsigned int WritebackDestinationWidth) +{ + double CalculateWriteBackDelay = + dml_max( + dml_ceil(WritebackLumaHTaps / 4.0, 1) / WritebackHRatio, + WritebackLumaVTaps * dml_ceil(1.0 / WritebackVRatio, 1) + * dml_ceil( + WritebackDestinationWidth + / 4.0, + 1) + + dml_ceil(1.0 / WritebackVRatio, 1) + * (dml_ceil( + WritebackLumaVTaps + / 4.0, + 1) + 4)); + + if (WritebackPixelFormat != dm_444_32) { + CalculateWriteBackDelay = + dml_max( + CalculateWriteBackDelay, + dml_max( + dml_ceil( + WritebackChromaHTaps + / 2.0, + 1) + / (2 + * WritebackHRatio), + WritebackChromaVTaps + * dml_ceil( + 1 + / (2 + * WritebackVRatio), + 1) + * dml_ceil( + WritebackDestinationWidth + / 2.0 + / 2.0, + 1) + + dml_ceil( + 1 + / (2 + * WritebackVRatio), + 1) + * (dml_ceil( + WritebackChromaVTaps + / 4.0, + 1) + + 4))); + } + return CalculateWriteBackDelay; +} + +static void CalculateActiveRowBandwidth( + bool GPUVMEnable, + enum source_format_class SourcePixelFormat, + double VRatio, + bool DCCEnable, + double LineTime, + unsigned int MetaRowByteLuma, + unsigned int MetaRowByteChroma, + unsigned int meta_row_height_luma, + unsigned int meta_row_height_chroma, + unsigned int PixelPTEBytesPerRowLuma, + unsigned int PixelPTEBytesPerRowChroma, + unsigned int dpte_row_height_luma, + unsigned int dpte_row_height_chroma, + double *meta_row_bw, + double *dpte_row_bw, + double *qual_row_bw) +{ + if (DCCEnable != true) { + *meta_row_bw = 0; + } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) { + *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + + VRatio / 2 * MetaRowByteChroma + / (meta_row_height_chroma * LineTime); + } else { + *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime); + } + + if (GPUVMEnable != true) { + *dpte_row_bw = 0; + } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) { + *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) + + VRatio / 2 * PixelPTEBytesPerRowChroma + / (dpte_row_height_chroma * LineTime); + } else { + *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime); + } + + if ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10)) { + *qual_row_bw = *meta_row_bw + *dpte_row_bw; + } else { + *qual_row_bw = 0; + } +} + +static void CalculateFlipSchedule( + struct display_mode_lib *mode_lib, + double UrgentExtraLatency, + double UrgentLatencyPixelDataOnly, + unsigned int GPUVMMaxPageTableLevels, + bool GPUVMEnable, + double BandwidthAvailableForImmediateFlip, + unsigned int TotImmediateFlipBytes, + enum source_format_class SourcePixelFormat, + unsigned int ImmediateFlipBytes, + double LineTime, + double VRatio, + double Tno_bw, + double PDEAndMetaPTEBytesFrame, + unsigned int MetaRowByte, + unsigned int PixelPTEBytesPerRow, + bool DCCEnable, + unsigned int dpte_row_height, + unsigned int meta_row_height, + double qual_row_bw, + double *DestinationLinesToRequestVMInImmediateFlip, + double *DestinationLinesToRequestRowInImmediateFlip, + double *final_flip_bw, + bool *ImmediateFlipSupportedForPipe) +{ + double min_row_time = 0.0; + + if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) { + *DestinationLinesToRequestVMInImmediateFlip = 0.0; + *DestinationLinesToRequestRowInImmediateFlip = 0.0; + *final_flip_bw = qual_row_bw; + *ImmediateFlipSupportedForPipe = true; + } else { + double TimeForFetchingMetaPTEImmediateFlip; + double TimeForFetchingRowInVBlankImmediateFlip; + + if (GPUVMEnable == true) { + mode_lib->vba.ImmediateFlipBW[0] = BandwidthAvailableForImmediateFlip + * ImmediateFlipBytes / TotImmediateFlipBytes; + TimeForFetchingMetaPTEImmediateFlip = + dml_max( + Tno_bw + + PDEAndMetaPTEBytesFrame + / mode_lib->vba.ImmediateFlipBW[0], + dml_max( + UrgentExtraLatency + + UrgentLatencyPixelDataOnly + * (GPUVMMaxPageTableLevels + - 1), + LineTime / 4.0)); + } else { + TimeForFetchingMetaPTEImmediateFlip = 0; + } + + *DestinationLinesToRequestVMInImmediateFlip = dml_floor( + 4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime + 0.125), + 1) / 4.0; + + if ((GPUVMEnable == true || DCCEnable == true)) { + mode_lib->vba.ImmediateFlipBW[0] = BandwidthAvailableForImmediateFlip + * ImmediateFlipBytes / TotImmediateFlipBytes; + TimeForFetchingRowInVBlankImmediateFlip = dml_max( + (MetaRowByte + PixelPTEBytesPerRow) + / mode_lib->vba.ImmediateFlipBW[0], + dml_max(UrgentLatencyPixelDataOnly, LineTime / 4.0)); + } else { + TimeForFetchingRowInVBlankImmediateFlip = 0; + } + + *DestinationLinesToRequestRowInImmediateFlip = dml_floor( + 4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime + 0.125), + 1) / 4.0; + + if (GPUVMEnable == true) { + *final_flip_bw = + dml_max( + PDEAndMetaPTEBytesFrame + / (*DestinationLinesToRequestVMInImmediateFlip + * LineTime), + (MetaRowByte + PixelPTEBytesPerRow) + / (TimeForFetchingRowInVBlankImmediateFlip + * LineTime)); + } else if (MetaRowByte + PixelPTEBytesPerRow > 0) { + *final_flip_bw = (MetaRowByte + PixelPTEBytesPerRow) + / (TimeForFetchingRowInVBlankImmediateFlip * LineTime); + } else { + *final_flip_bw = 0; + } + + if (GPUVMEnable && !DCCEnable) + min_row_time = dpte_row_height * LineTime / VRatio; + else if (!GPUVMEnable && DCCEnable) + min_row_time = meta_row_height * LineTime / VRatio; + else + min_row_time = dml_min(dpte_row_height, meta_row_height) * LineTime + / VRatio; + + if (*DestinationLinesToRequestVMInImmediateFlip >= 8 + || *DestinationLinesToRequestRowInImmediateFlip >= 16 + || TimeForFetchingMetaPTEImmediateFlip + + 2 * TimeForFetchingRowInVBlankImmediateFlip + > min_row_time) + *ImmediateFlipSupportedForPipe = false; + else + *ImmediateFlipSupportedForPipe = true; + } +} + +static unsigned int TruncToValidBPP( + double DecimalBPP, + double DesiredBPP, + bool DSCEnabled, + enum output_encoder_class Output, + enum output_format_class Format, + unsigned int DSCInputBitPerComponent) +{ + if (Output == dm_hdmi) { + if (Format == dm_420) { + if (DecimalBPP >= 18 && (DesiredBPP == 0 || DesiredBPP == 18)) + return 18; + else if (DecimalBPP >= 15 && (DesiredBPP == 0 || DesiredBPP == 15)) + return 15; + else if (DecimalBPP >= 12 && (DesiredBPP == 0 || DesiredBPP == 12)) + return 12; + else + return BPP_INVALID; + } else if (Format == dm_444) { + if (DecimalBPP >= 36 && (DesiredBPP == 0 || DesiredBPP == 36)) + return 36; + else if (DecimalBPP >= 30 && (DesiredBPP == 0 || DesiredBPP == 30)) + return 30; + else if (DecimalBPP >= 24 && (DesiredBPP == 0 || DesiredBPP == 24)) + return 24; + else if (DecimalBPP >= 18 && (DesiredBPP == 0 || DesiredBPP == 18)) + return 18; + else + return BPP_INVALID; + } else { + if (DecimalBPP / 1.5 >= 24 && (DesiredBPP == 0 || DesiredBPP == 24)) + return 24; + else if (DecimalBPP / 1.5 >= 20 && (DesiredBPP == 0 || DesiredBPP == 20)) + return 20; + else if (DecimalBPP / 1.5 >= 16 && (DesiredBPP == 0 || DesiredBPP == 16)) + return 16; + else + return BPP_INVALID; + } + } else { + if (DSCEnabled) { + if (Format == dm_420) { + if (DesiredBPP == 0) { + if (DecimalBPP < 6) + return BPP_INVALID; + else if (DecimalBPP >= 1.5 * DSCInputBitPerComponent - 1.0 / 16.0) + return 1.5 * DSCInputBitPerComponent - 1.0 / 16.0; + else + return dml_floor(16 * DecimalBPP, 1) / 16.0; + } else { + if (DecimalBPP < 6 + || DesiredBPP < 6 + || DesiredBPP > 1.5 * DSCInputBitPerComponent - 1.0 / 16.0 + || DecimalBPP < DesiredBPP) { + return BPP_INVALID; + } else { + return DesiredBPP; + } + } + } else if (Format == dm_n422) { + if (DesiredBPP == 0) { + if (DecimalBPP < 7) + return BPP_INVALID; + else if (DecimalBPP >= 2 * DSCInputBitPerComponent - 1.0 / 16.0) + return 2 * DSCInputBitPerComponent - 1.0 / 16.0; + else + return dml_floor(16 * DecimalBPP, 1) / 16.0; + } else { + if (DecimalBPP < 7 + || DesiredBPP < 7 + || DesiredBPP > 2 * DSCInputBitPerComponent - 1.0 / 16.0 + || DecimalBPP < DesiredBPP) { + return BPP_INVALID; + } else { + return DesiredBPP; + } + } + } else { + if (DesiredBPP == 0) { + if (DecimalBPP < 8) + return BPP_INVALID; + else if (DecimalBPP >= 3 * DSCInputBitPerComponent - 1.0 / 16.0) + return 3 * DSCInputBitPerComponent - 1.0 / 16.0; + else + return dml_floor(16 * DecimalBPP, 1) / 16.0; + } else { + if (DecimalBPP < 8 + || DesiredBPP < 8 + || DesiredBPP > 3 * DSCInputBitPerComponent - 1.0 / 16.0 + || DecimalBPP < DesiredBPP) { + return BPP_INVALID; + } else { + return DesiredBPP; + } + } + } + } else if (Format == dm_420) { + if (DecimalBPP >= 18 && (DesiredBPP == 0 || DesiredBPP == 18)) + return 18; + else if (DecimalBPP >= 15 && (DesiredBPP == 0 || DesiredBPP == 15)) + return 15; + else if (DecimalBPP >= 12 && (DesiredBPP == 0 || DesiredBPP == 12)) + return 12; + else + return BPP_INVALID; + } else if (Format == dm_s422 || Format == dm_n422) { + if (DecimalBPP >= 24 && (DesiredBPP == 0 || DesiredBPP == 24)) + return 24; + else if (DecimalBPP >= 20 && (DesiredBPP == 0 || DesiredBPP == 20)) + return 20; + else if (DecimalBPP >= 16 && (DesiredBPP == 0 || DesiredBPP == 16)) + return 16; + else + return BPP_INVALID; + } else { + if (DecimalBPP >= 36 && (DesiredBPP == 0 || DesiredBPP == 36)) + return 36; + else if (DecimalBPP >= 30 && (DesiredBPP == 0 || DesiredBPP == 30)) + return 30; + else if (DecimalBPP >= 24 && (DesiredBPP == 0 || DesiredBPP == 24)) + return 24; + else if (DecimalBPP >= 18 && (DesiredBPP == 0 || DesiredBPP == 18)) + return 18; + else + return BPP_INVALID; + } + } +} + +void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) +{ + struct vba_vars_st *locals = &mode_lib->vba; + + int i; + unsigned int j, k, m; + + /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/ + + /*Scale Ratio, taps Support Check*/ + + mode_lib->vba.ScaleRatioAndTapsSupport = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.ScalerEnabled[k] == false + && ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8) + || mode_lib->vba.HRatio[k] != 1.0 + || mode_lib->vba.htaps[k] != 1.0 + || mode_lib->vba.VRatio[k] != 1.0 + || mode_lib->vba.vtaps[k] != 1.0)) { + mode_lib->vba.ScaleRatioAndTapsSupport = false; + } else if (mode_lib->vba.vtaps[k] < 1.0 || mode_lib->vba.vtaps[k] > 8.0 + || mode_lib->vba.htaps[k] < 1.0 || mode_lib->vba.htaps[k] > 8.0 + || (mode_lib->vba.htaps[k] > 1.0 + && (mode_lib->vba.htaps[k] % 2) == 1) + || mode_lib->vba.HRatio[k] > mode_lib->vba.MaxHSCLRatio + || mode_lib->vba.VRatio[k] > mode_lib->vba.MaxVSCLRatio + || mode_lib->vba.HRatio[k] > mode_lib->vba.htaps[k] + || mode_lib->vba.VRatio[k] > mode_lib->vba.vtaps[k] + || (mode_lib->vba.SourcePixelFormat[k] != dm_444_64 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8 + && (mode_lib->vba.HRatio[k] / 2.0 + > mode_lib->vba.HTAPsChroma[k] + || mode_lib->vba.VRatio[k] / 2.0 + > mode_lib->vba.VTAPsChroma[k]))) { + mode_lib->vba.ScaleRatioAndTapsSupport = false; + } + } + /*Source Format, Pixel Format and Scan Support Check*/ + + mode_lib->vba.SourceFormatPixelAndScanSupport = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if ((mode_lib->vba.SurfaceTiling[k] == dm_sw_linear + && mode_lib->vba.SourceScan[k] != dm_horz) + || ((mode_lib->vba.SurfaceTiling[k] == dm_sw_4kb_d + || mode_lib->vba.SurfaceTiling[k] == dm_sw_4kb_d_x + || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_d + || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_d_t + || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_d_x + || mode_lib->vba.SurfaceTiling[k] == dm_sw_var_d + || mode_lib->vba.SurfaceTiling[k] == dm_sw_var_d_x) + && mode_lib->vba.SourcePixelFormat[k] != dm_444_64) + || (mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_r_x + && (mode_lib->vba.SourcePixelFormat[k] == dm_mono_8 + || mode_lib->vba.SourcePixelFormat[k] + == dm_420_8 + || mode_lib->vba.SourcePixelFormat[k] + == dm_420_10)) + || (((mode_lib->vba.SurfaceTiling[k] == dm_sw_gfx7_2d_thin_gl + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_gfx7_2d_thin_l_vp) + && !((mode_lib->vba.SourcePixelFormat[k] + == dm_444_64 + || mode_lib->vba.SourcePixelFormat[k] + == dm_444_32) + && mode_lib->vba.SourceScan[k] + == dm_horz + && mode_lib->vba.SupportGFX7CompatibleTilingIn32bppAnd64bpp + == true + && mode_lib->vba.DCCEnable[k] + == false)) + || (mode_lib->vba.DCCEnable[k] == true + && (mode_lib->vba.SurfaceTiling[k] + == dm_sw_linear + || mode_lib->vba.SourcePixelFormat[k] + == dm_420_8 + || mode_lib->vba.SourcePixelFormat[k] + == dm_420_10)))) { + mode_lib->vba.SourceFormatPixelAndScanSupport = false; + } + } + /*Bandwidth Support Check*/ + + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) { + locals->BytePerPixelInDETY[k] = 8.0; + locals->BytePerPixelInDETC[k] = 0.0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32) { + locals->BytePerPixelInDETY[k] = 4.0; + locals->BytePerPixelInDETC[k] = 0.0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_16 + || mode_lib->vba.SourcePixelFormat[k] == dm_mono_16) { + locals->BytePerPixelInDETY[k] = 2.0; + locals->BytePerPixelInDETC[k] = 0.0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_mono_8) { + locals->BytePerPixelInDETY[k] = 1.0; + locals->BytePerPixelInDETC[k] = 0.0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) { + locals->BytePerPixelInDETY[k] = 1.0; + locals->BytePerPixelInDETC[k] = 2.0; + } else { + locals->BytePerPixelInDETY[k] = 4.0 / 3; + locals->BytePerPixelInDETC[k] = 8.0 / 3; + } + if (mode_lib->vba.SourceScan[k] == dm_horz) { + locals->SwathWidthYSingleDPP[k] = mode_lib->vba.ViewportWidth[k]; + } else { + locals->SwathWidthYSingleDPP[k] = mode_lib->vba.ViewportHeight[k]; + } + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + locals->ReadBandwidthLuma[k] = locals->SwathWidthYSingleDPP[k] * dml_ceil(locals->BytePerPixelInDETY[k], 1.0) + / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k]; + locals->ReadBandwidthChroma[k] = locals->SwathWidthYSingleDPP[k] / 2 * dml_ceil(locals->BytePerPixelInDETC[k], 2.0) + / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k] / 2.0; + locals->ReadBandwidth[k] = locals->ReadBandwidthLuma[k] + locals->ReadBandwidthChroma[k]; + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.WritebackEnable[k] == true + && mode_lib->vba.WritebackPixelFormat[k] == dm_444_32) { + locals->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k] + * mode_lib->vba.WritebackDestinationHeight[k] + / (mode_lib->vba.WritebackSourceHeight[k] + * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]) * 4.0; + } else if (mode_lib->vba.WritebackEnable[k] == true + && mode_lib->vba.WritebackPixelFormat[k] == dm_420_10) { + locals->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k] + * mode_lib->vba.WritebackDestinationHeight[k] + / (mode_lib->vba.WritebackSourceHeight[k] + * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]) * 3.0; + } else if (mode_lib->vba.WritebackEnable[k] == true) { + locals->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k] + * mode_lib->vba.WritebackDestinationHeight[k] + / (mode_lib->vba.WritebackSourceHeight[k] + * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]) * 1.5; + } else { + locals->WriteBandwidth[k] = 0.0; + } + } + mode_lib->vba.DCCEnabledInAnyPlane = false; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.DCCEnable[k] == true) { + mode_lib->vba.DCCEnabledInAnyPlane = true; + } + } + mode_lib->vba.UrgentLatency = mode_lib->vba.UrgentLatencyPixelDataOnly; + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + locals->FabricAndDRAMBandwidthPerState[i] = dml_min( + mode_lib->vba.DRAMSpeedPerState[i] * mode_lib->vba.NumberOfChannels + * mode_lib->vba.DRAMChannelWidth, + mode_lib->vba.FabricClockPerState[i] + * mode_lib->vba.FabricDatapathToDCNDataReturn) / 1000; + locals->ReturnBWToDCNPerState = dml_min(locals->ReturnBusWidth * locals->DCFCLKPerState[i], + locals->FabricAndDRAMBandwidthPerState[i] * 1000) + * locals->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly / 100; + + locals->ReturnBWPerState[i][0] = locals->ReturnBWToDCNPerState; + + if (locals->DCCEnabledInAnyPlane == true && locals->ReturnBWToDCNPerState > locals->DCFCLKPerState[i] * locals->ReturnBusWidth / 4) { + locals->ReturnBWPerState[i][0] = dml_min(locals->ReturnBWPerState[i][0], + locals->ReturnBWToDCNPerState * 4 * (1 - locals->UrgentLatency / + ((locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024 + / (locals->ReturnBWToDCNPerState - locals->DCFCLKPerState[i] + * locals->ReturnBusWidth / 4) + locals->UrgentLatency))); + } + locals->CriticalPoint = 2 * locals->ReturnBusWidth * locals->DCFCLKPerState[i] * + locals->UrgentLatency / (locals->ReturnBWToDCNPerState * locals->UrgentLatency + + (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024); + + if (locals->DCCEnabledInAnyPlane && locals->CriticalPoint > 1 && locals->CriticalPoint < 4) { + locals->ReturnBWPerState[i][0] = dml_min(locals->ReturnBWPerState[i][0], + 4 * locals->ReturnBWToDCNPerState * + (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024 + * locals->ReturnBusWidth * locals->DCFCLKPerState[i] * locals->UrgentLatency / + dml_pow((locals->ReturnBWToDCNPerState * locals->UrgentLatency + + (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024), 2)); + } + + locals->ReturnBWToDCNPerState = dml_min(locals->ReturnBusWidth * + locals->DCFCLKPerState[i], locals->FabricAndDRAMBandwidthPerState[i] * 1000); + + if (locals->DCCEnabledInAnyPlane == true && locals->ReturnBWToDCNPerState > locals->DCFCLKPerState[i] * locals->ReturnBusWidth / 4) { + locals->ReturnBWPerState[i][0] = dml_min(locals->ReturnBWPerState[i][0], + locals->ReturnBWToDCNPerState * 4 * (1 - locals->UrgentLatency / + ((locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024 + / (locals->ReturnBWToDCNPerState - locals->DCFCLKPerState[i] + * locals->ReturnBusWidth / 4) + locals->UrgentLatency))); + } + locals->CriticalPoint = 2 * locals->ReturnBusWidth * locals->DCFCLKPerState[i] * + locals->UrgentLatency / (locals->ReturnBWToDCNPerState * locals->UrgentLatency + + (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024); + + if (locals->DCCEnabledInAnyPlane && locals->CriticalPoint > 1 && locals->CriticalPoint < 4) { + locals->ReturnBWPerState[i][0] = dml_min(locals->ReturnBWPerState[i][0], + 4 * locals->ReturnBWToDCNPerState * + (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024 + * locals->ReturnBusWidth * locals->DCFCLKPerState[i] * locals->UrgentLatency / + dml_pow((locals->ReturnBWToDCNPerState * locals->UrgentLatency + + (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024), 2)); + } + } + /*Writeback Latency support check*/ + + mode_lib->vba.WritebackLatencySupport = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.WritebackEnable[k] == true) { + if (mode_lib->vba.WritebackPixelFormat[k] == dm_444_32) { + if (locals->WriteBandwidth[k] + > (mode_lib->vba.WritebackInterfaceLumaBufferSize + + mode_lib->vba.WritebackInterfaceChromaBufferSize) + / mode_lib->vba.WritebackLatency) { + mode_lib->vba.WritebackLatencySupport = false; + } + } else { + if (locals->WriteBandwidth[k] + > 1.5 + * dml_min( + mode_lib->vba.WritebackInterfaceLumaBufferSize, + 2.0 + * mode_lib->vba.WritebackInterfaceChromaBufferSize) + / mode_lib->vba.WritebackLatency) { + mode_lib->vba.WritebackLatencySupport = false; + } + } + } + } + /*Re-ordering Buffer Support Check*/ + + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i] = + (mode_lib->vba.RoundTripPingLatencyCycles + 32.0) / mode_lib->vba.DCFCLKPerState[i] + + locals->UrgentOutOfOrderReturnPerChannel * mode_lib->vba.NumberOfChannels / locals->ReturnBWPerState[i][0]; + if ((mode_lib->vba.ROBBufferSizeInKByte - mode_lib->vba.PixelChunkSizeInKByte) * 1024.0 / locals->ReturnBWPerState[i][0] + > locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i]) { + locals->ROBSupport[i][0] = true; + } else { + locals->ROBSupport[i][0] = false; + } + } + /*Writeback Mode Support Check*/ + + mode_lib->vba.TotalNumberOfActiveWriteback = 0; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.WritebackEnable[k] == true) { + if (mode_lib->vba.ActiveWritebacksPerPlane[k] == 0) + mode_lib->vba.ActiveWritebacksPerPlane[k] = 1; + mode_lib->vba.TotalNumberOfActiveWriteback = + mode_lib->vba.TotalNumberOfActiveWriteback + + mode_lib->vba.ActiveWritebacksPerPlane[k]; + } + } + mode_lib->vba.WritebackModeSupport = true; + if (mode_lib->vba.TotalNumberOfActiveWriteback > mode_lib->vba.MaxNumWriteback) { + mode_lib->vba.WritebackModeSupport = false; + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.WritebackEnable[k] == true + && mode_lib->vba.Writeback10bpc420Supported != true + && mode_lib->vba.WritebackPixelFormat[k] == dm_420_10) { + mode_lib->vba.WritebackModeSupport = false; + } + } + /*Writeback Scale Ratio and Taps Support Check*/ + + mode_lib->vba.WritebackScaleRatioAndTapsSupport = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.WritebackEnable[k] == true) { + if (mode_lib->vba.WritebackLumaAndChromaScalingSupported == false + && (mode_lib->vba.WritebackHRatio[k] != 1.0 + || mode_lib->vba.WritebackVRatio[k] != 1.0)) { + mode_lib->vba.WritebackScaleRatioAndTapsSupport = false; + } + if (mode_lib->vba.WritebackHRatio[k] > mode_lib->vba.WritebackMaxHSCLRatio + || mode_lib->vba.WritebackVRatio[k] + > mode_lib->vba.WritebackMaxVSCLRatio + || mode_lib->vba.WritebackHRatio[k] + < mode_lib->vba.WritebackMinHSCLRatio + || mode_lib->vba.WritebackVRatio[k] + < mode_lib->vba.WritebackMinVSCLRatio + || mode_lib->vba.WritebackLumaHTaps[k] + > mode_lib->vba.WritebackMaxHSCLTaps + || mode_lib->vba.WritebackLumaVTaps[k] + > mode_lib->vba.WritebackMaxVSCLTaps + || mode_lib->vba.WritebackHRatio[k] + > mode_lib->vba.WritebackLumaHTaps[k] + || mode_lib->vba.WritebackVRatio[k] + > mode_lib->vba.WritebackLumaVTaps[k] + || (mode_lib->vba.WritebackLumaHTaps[k] > 2.0 + && ((mode_lib->vba.WritebackLumaHTaps[k] % 2) + == 1)) + || (mode_lib->vba.WritebackPixelFormat[k] != dm_444_32 + && (mode_lib->vba.WritebackChromaHTaps[k] + > mode_lib->vba.WritebackMaxHSCLTaps + || mode_lib->vba.WritebackChromaVTaps[k] + > mode_lib->vba.WritebackMaxVSCLTaps + || 2.0 + * mode_lib->vba.WritebackHRatio[k] + > mode_lib->vba.WritebackChromaHTaps[k] + || 2.0 + * mode_lib->vba.WritebackVRatio[k] + > mode_lib->vba.WritebackChromaVTaps[k] + || (mode_lib->vba.WritebackChromaHTaps[k] > 2.0 + && ((mode_lib->vba.WritebackChromaHTaps[k] % 2) == 1))))) { + mode_lib->vba.WritebackScaleRatioAndTapsSupport = false; + } + if (mode_lib->vba.WritebackVRatio[k] < 1.0) { + mode_lib->vba.WritebackLumaVExtra = + dml_max(1.0 - 2.0 / dml_ceil(1.0 / mode_lib->vba.WritebackVRatio[k], 1.0), 0.0); + } else { + mode_lib->vba.WritebackLumaVExtra = -1; + } + if ((mode_lib->vba.WritebackPixelFormat[k] == dm_444_32 + && mode_lib->vba.WritebackLumaVTaps[k] + > (mode_lib->vba.WritebackLineBufferLumaBufferSize + + mode_lib->vba.WritebackLineBufferChromaBufferSize) + / 3.0 + / mode_lib->vba.WritebackDestinationWidth[k] + - mode_lib->vba.WritebackLumaVExtra) + || (mode_lib->vba.WritebackPixelFormat[k] == dm_420_8 + && mode_lib->vba.WritebackLumaVTaps[k] + > mode_lib->vba.WritebackLineBufferLumaBufferSize + * 8.0 / 10.0 / mode_lib->vba.WritebackDestinationWidth[k] + - mode_lib->vba.WritebackLumaVExtra) + || (mode_lib->vba.WritebackPixelFormat[k] == dm_420_10 + && mode_lib->vba.WritebackLumaVTaps[k] + > mode_lib->vba.WritebackLineBufferLumaBufferSize + * 8.0 / 10.0 + / mode_lib->vba.WritebackDestinationWidth[k] + - mode_lib->vba.WritebackLumaVExtra)) { + mode_lib->vba.WritebackScaleRatioAndTapsSupport = false; + } + if (2.0 * mode_lib->vba.WritebackVRatio[k] < 1) { + mode_lib->vba.WritebackChromaVExtra = 0.0; + } else { + mode_lib->vba.WritebackChromaVExtra = -1; + } + if ((mode_lib->vba.WritebackPixelFormat[k] == dm_420_8 + && mode_lib->vba.WritebackChromaVTaps[k] + > mode_lib->vba.WritebackLineBufferChromaBufferSize + * 8.0 / 10.0 / mode_lib->vba.WritebackDestinationWidth[k] + - mode_lib->vba.WritebackChromaVExtra) + || (mode_lib->vba.WritebackPixelFormat[k] == dm_420_10 + && mode_lib->vba.WritebackChromaVTaps[k] + > mode_lib->vba.WritebackLineBufferChromaBufferSize + * 8.0 / 10.0 + / mode_lib->vba.WritebackDestinationWidth[k] + - mode_lib->vba.WritebackChromaVExtra)) { + mode_lib->vba.WritebackScaleRatioAndTapsSupport = false; + } + } + } + /*Maximum DISPCLK/DPPCLK Support check*/ + + mode_lib->vba.WritebackRequiredDISPCLK = 0.0; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.WritebackEnable[k] == true) { + mode_lib->vba.WritebackRequiredDISPCLK = + dml_max( + mode_lib->vba.WritebackRequiredDISPCLK, + CalculateWriteBackDISPCLK( + mode_lib->vba.WritebackPixelFormat[k], + mode_lib->vba.PixelClock[k], + mode_lib->vba.WritebackHRatio[k], + mode_lib->vba.WritebackVRatio[k], + mode_lib->vba.WritebackLumaHTaps[k], + mode_lib->vba.WritebackLumaVTaps[k], + mode_lib->vba.WritebackChromaHTaps[k], + mode_lib->vba.WritebackChromaVTaps[k], + mode_lib->vba.WritebackDestinationWidth[k], + mode_lib->vba.HTotal[k], + mode_lib->vba.WritebackChromaLineBufferWidth)); + } + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.HRatio[k] > 1.0) { + locals->PSCL_FACTOR[k] = dml_min( + mode_lib->vba.MaxDCHUBToPSCLThroughput, + mode_lib->vba.MaxPSCLToLBThroughput + * mode_lib->vba.HRatio[k] + / dml_ceil( + mode_lib->vba.htaps[k] + / 6.0, + 1.0)); + } else { + locals->PSCL_FACTOR[k] = dml_min( + mode_lib->vba.MaxDCHUBToPSCLThroughput, + mode_lib->vba.MaxPSCLToLBThroughput); + } + if (locals->BytePerPixelInDETC[k] == 0.0) { + locals->PSCL_FACTOR_CHROMA[k] = 0.0; + locals->MinDPPCLKUsingSingleDPP[k] = + mode_lib->vba.PixelClock[k] + * dml_max3( + mode_lib->vba.vtaps[k] / 6.0 + * dml_min( + 1.0, + mode_lib->vba.HRatio[k]), + mode_lib->vba.HRatio[k] + * mode_lib->vba.VRatio[k] + / locals->PSCL_FACTOR[k], + 1.0); + if ((mode_lib->vba.htaps[k] > 6.0 || mode_lib->vba.vtaps[k] > 6.0) + && locals->MinDPPCLKUsingSingleDPP[k] + < 2.0 * mode_lib->vba.PixelClock[k]) { + locals->MinDPPCLKUsingSingleDPP[k] = 2.0 + * mode_lib->vba.PixelClock[k]; + } + } else { + if (mode_lib->vba.HRatio[k] / 2.0 > 1.0) { + locals->PSCL_FACTOR_CHROMA[k] = + dml_min( + mode_lib->vba.MaxDCHUBToPSCLThroughput, + mode_lib->vba.MaxPSCLToLBThroughput + * mode_lib->vba.HRatio[k] + / 2.0 + / dml_ceil( + mode_lib->vba.HTAPsChroma[k] + / 6.0, + 1.0)); + } else { + locals->PSCL_FACTOR_CHROMA[k] = dml_min( + mode_lib->vba.MaxDCHUBToPSCLThroughput, + mode_lib->vba.MaxPSCLToLBThroughput); + } + locals->MinDPPCLKUsingSingleDPP[k] = + mode_lib->vba.PixelClock[k] + * dml_max5( + mode_lib->vba.vtaps[k] / 6.0 + * dml_min( + 1.0, + mode_lib->vba.HRatio[k]), + mode_lib->vba.HRatio[k] + * mode_lib->vba.VRatio[k] + / locals->PSCL_FACTOR[k], + mode_lib->vba.VTAPsChroma[k] + / 6.0 + * dml_min( + 1.0, + mode_lib->vba.HRatio[k] + / 2.0), + mode_lib->vba.HRatio[k] + * mode_lib->vba.VRatio[k] + / 4.0 + / locals->PSCL_FACTOR_CHROMA[k], + 1.0); + if ((mode_lib->vba.htaps[k] > 6.0 || mode_lib->vba.vtaps[k] > 6.0 + || mode_lib->vba.HTAPsChroma[k] > 6.0 + || mode_lib->vba.VTAPsChroma[k] > 6.0) + && locals->MinDPPCLKUsingSingleDPP[k] + < 2.0 * mode_lib->vba.PixelClock[k]) { + locals->MinDPPCLKUsingSingleDPP[k] = 2.0 + * mode_lib->vba.PixelClock[k]; + } + } + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + Calculate256BBlockSizes( + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.SurfaceTiling[k], + dml_ceil(locals->BytePerPixelInDETY[k], 1.0), + dml_ceil(locals->BytePerPixelInDETC[k], 2.0), + &locals->Read256BlockHeightY[k], + &locals->Read256BlockHeightC[k], + &locals->Read256BlockWidthY[k], + &locals->Read256BlockWidthC[k]); + if (mode_lib->vba.SourceScan[k] == dm_horz) { + locals->MaxSwathHeightY[k] = locals->Read256BlockHeightY[k]; + locals->MaxSwathHeightC[k] = locals->Read256BlockHeightC[k]; + } else { + locals->MaxSwathHeightY[k] = locals->Read256BlockWidthY[k]; + locals->MaxSwathHeightC[k] = locals->Read256BlockWidthC[k]; + } + if ((mode_lib->vba.SourcePixelFormat[k] == dm_444_64 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_32 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_16 + || mode_lib->vba.SourcePixelFormat[k] == dm_mono_16 + || mode_lib->vba.SourcePixelFormat[k] == dm_mono_8)) { + if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear + || (mode_lib->vba.SourcePixelFormat[k] == dm_444_64 + && (mode_lib->vba.SurfaceTiling[k] + == dm_sw_4kb_s + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_4kb_s_x + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_64kb_s + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_64kb_s_t + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_64kb_s_x + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_var_s + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_var_s_x) + && mode_lib->vba.SourceScan[k] == dm_horz)) { + locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k]; + } else { + locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k] + / 2.0; + } + locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k]; + } else { + if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) { + locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k]; + locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k]; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8 + && mode_lib->vba.SourceScan[k] == dm_horz) { + locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k] + / 2.0; + locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k]; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10 + && mode_lib->vba.SourceScan[k] == dm_horz) { + locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k] + / 2.0; + locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k]; + } else { + locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k]; + locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k]; + } + } + if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) { + mode_lib->vba.MaximumSwathWidthSupport = 8192.0; + } else { + mode_lib->vba.MaximumSwathWidthSupport = 5120.0; + } + mode_lib->vba.MaximumSwathWidthInDETBuffer = + dml_min( + mode_lib->vba.MaximumSwathWidthSupport, + mode_lib->vba.DETBufferSizeInKByte[0] * 1024.0 / 2.0 + / (locals->BytePerPixelInDETY[k] + * locals->MinSwathHeightY[k] + + locals->BytePerPixelInDETC[k] + / 2.0 + * locals->MinSwathHeightC[k])); + if (locals->BytePerPixelInDETC[k] == 0.0) { + mode_lib->vba.MaximumSwathWidthInLineBuffer = + mode_lib->vba.LineBufferSize + * dml_max(mode_lib->vba.HRatio[k], 1.0) + / mode_lib->vba.LBBitPerPixel[k] + / (mode_lib->vba.vtaps[k] + + dml_max( + dml_ceil( + mode_lib->vba.VRatio[k], + 1.0) + - 2, + 0.0)); + } else { + mode_lib->vba.MaximumSwathWidthInLineBuffer = + dml_min( + mode_lib->vba.LineBufferSize + * dml_max( + mode_lib->vba.HRatio[k], + 1.0) + / mode_lib->vba.LBBitPerPixel[k] + / (mode_lib->vba.vtaps[k] + + dml_max( + dml_ceil( + mode_lib->vba.VRatio[k], + 1.0) + - 2, + 0.0)), + 2.0 * mode_lib->vba.LineBufferSize + * dml_max( + mode_lib->vba.HRatio[k] + / 2.0, + 1.0) + / mode_lib->vba.LBBitPerPixel[k] + / (mode_lib->vba.VTAPsChroma[k] + + dml_max( + dml_ceil( + mode_lib->vba.VRatio[k] + / 2.0, + 1.0) + - 2, + 0.0))); + } + locals->MaximumSwathWidth[k] = dml_min( + mode_lib->vba.MaximumSwathWidthInDETBuffer, + mode_lib->vba.MaximumSwathWidthInLineBuffer); + } + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + double MaxMaxDispclkRoundedDown = RoundToDFSGranularityDown( + mode_lib->vba.MaxDispclk[mode_lib->vba.soc.num_states], + mode_lib->vba.DISPCLKDPPCLKVCOSpeed); + + for (j = 0; j < 2; j++) { + mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown( + mode_lib->vba.MaxDispclk[i], + mode_lib->vba.DISPCLKDPPCLKVCOSpeed); + mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown( + mode_lib->vba.MaxDppclk[i], + mode_lib->vba.DISPCLKDPPCLKVCOSpeed); + locals->RequiredDISPCLK[i][j] = 0.0; + locals->DISPCLK_DPPCLK_Support[i][j] = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine = + mode_lib->vba.PixelClock[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) + * (1.0 + mode_lib->vba.DISPCLKRampingMargin / 100.0); + if (mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine >= mode_lib->vba.MaxDispclk[i] + && i == mode_lib->vba.soc.num_states) + mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine = mode_lib->vba.PixelClock[k] + * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); + + mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = mode_lib->vba.PixelClock[k] / 2 + * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * (1 + mode_lib->vba.DISPCLKRampingMargin / 100.0); + if (mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine >= mode_lib->vba.MaxDispclk[i] + && i == mode_lib->vba.soc.num_states) + mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = mode_lib->vba.PixelClock[k] / 2 + * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); + + locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled; + mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine; + if (mode_lib->vba.ODMCapability) { + if (locals->PlaneRequiredDISPCLKWithoutODMCombine > MaxMaxDispclkRoundedDown) { + locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; + mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine; + } else if (locals->DSCEnabled[k] && (locals->HActive[k] > DCN20_MAX_DSC_IMAGE_WIDTH)) { + locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; + mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine; + } else if (locals->HActive[k] > DCN20_MAX_420_IMAGE_WIDTH && locals->OutputFormat[k] == dm_420) { + locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; + mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine; + } + } + + if (locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) <= mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity + && locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k] + && locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) { + locals->NoOfDPP[i][j][k] = 1; + locals->RequiredDPPCLK[i][j][k] = + locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); + } else { + locals->NoOfDPP[i][j][k] = 2; + locals->RequiredDPPCLK[i][j][k] = + locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0; + } + locals->RequiredDISPCLK[i][j] = dml_max( + locals->RequiredDISPCLK[i][j], + mode_lib->vba.PlaneRequiredDISPCLK); + if ((locals->MinDPPCLKUsingSingleDPP[k] / locals->NoOfDPP[i][j][k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) + > mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity) + || (mode_lib->vba.PlaneRequiredDISPCLK > mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity)) { + locals->DISPCLK_DPPCLK_Support[i][j] = false; + } + } + locals->TotalNumberOfActiveDPP[i][j] = 0.0; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) + locals->TotalNumberOfActiveDPP[i][j] = locals->TotalNumberOfActiveDPP[i][j] + locals->NoOfDPP[i][j][k]; + if (j == 1) { + while (locals->TotalNumberOfActiveDPP[i][j] < mode_lib->vba.MaxNumDPP + && locals->TotalNumberOfActiveDPP[i][j] < 2 * mode_lib->vba.NumberOfActivePlanes) { + double BWOfNonSplitPlaneOfMaximumBandwidth; + unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth; + + BWOfNonSplitPlaneOfMaximumBandwidth = 0; + NumberOfNonSplitPlaneOfMaximumBandwidth = 0; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (locals->ReadBandwidth[k] > BWOfNonSplitPlaneOfMaximumBandwidth && locals->NoOfDPP[i][j][k] == 1) { + BWOfNonSplitPlaneOfMaximumBandwidth = locals->ReadBandwidth[k]; + NumberOfNonSplitPlaneOfMaximumBandwidth = k; + } + } + locals->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2; + locals->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = + locals->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth] + * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2; + locals->TotalNumberOfActiveDPP[i][j] = locals->TotalNumberOfActiveDPP[i][j] + 1; + } + } + if (locals->TotalNumberOfActiveDPP[i][j] > mode_lib->vba.MaxNumDPP) { + locals->RequiredDISPCLK[i][j] = 0.0; + locals->DISPCLK_DPPCLK_Support[i][j] = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled; + if (locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k]) { + locals->NoOfDPP[i][j][k] = 1; + locals->RequiredDPPCLK[i][j][k] = locals->MinDPPCLKUsingSingleDPP[k] + * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); + } else { + locals->NoOfDPP[i][j][k] = 2; + locals->RequiredDPPCLK[i][j][k] = locals->MinDPPCLKUsingSingleDPP[k] + * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0; + } + if (i != mode_lib->vba.soc.num_states) { + mode_lib->vba.PlaneRequiredDISPCLK = + mode_lib->vba.PixelClock[k] + * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) + * (1.0 + mode_lib->vba.DISPCLKRampingMargin / 100.0); + } else { + mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PixelClock[k] + * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); + } + locals->RequiredDISPCLK[i][j] = dml_max( + locals->RequiredDISPCLK[i][j], + mode_lib->vba.PlaneRequiredDISPCLK); + if (locals->MinDPPCLKUsingSingleDPP[k] / locals->NoOfDPP[i][j][k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) + > mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity + || mode_lib->vba.PlaneRequiredDISPCLK > mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity) + locals->DISPCLK_DPPCLK_Support[i][j] = false; + } + locals->TotalNumberOfActiveDPP[i][j] = 0.0; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) + locals->TotalNumberOfActiveDPP[i][j] = locals->TotalNumberOfActiveDPP[i][j] + locals->NoOfDPP[i][j][k]; + } + locals->RequiredDISPCLK[i][j] = dml_max( + locals->RequiredDISPCLK[i][j], + mode_lib->vba.WritebackRequiredDISPCLK); + if (mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity + < mode_lib->vba.WritebackRequiredDISPCLK) { + locals->DISPCLK_DPPCLK_Support[i][j] = false; + } + } + } + /*Viewport Size Check*/ + + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + locals->ViewportSizeSupport[i][0] = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { + if (dml_min(locals->SwathWidthYSingleDPP[k], dml_round(mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k])) + > locals->MaximumSwathWidth[k]) { + locals->ViewportSizeSupport[i][0] = false; + } + } else { + if (locals->SwathWidthYSingleDPP[k] / 2.0 > locals->MaximumSwathWidth[k]) { + locals->ViewportSizeSupport[i][0] = false; + } + } + } + } + /*Total Available Pipes Support Check*/ + + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + for (j = 0; j < 2; j++) { + if (locals->TotalNumberOfActiveDPP[i][j] <= mode_lib->vba.MaxNumDPP) + locals->TotalAvailablePipesSupport[i][j] = true; + else + locals->TotalAvailablePipesSupport[i][j] = false; + } + } + /*Total Available OTG Support Check*/ + + mode_lib->vba.TotalNumberOfActiveOTG = 0.0; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.BlendingAndTiming[k] == k) { + mode_lib->vba.TotalNumberOfActiveOTG = mode_lib->vba.TotalNumberOfActiveOTG + + 1.0; + } + } + if (mode_lib->vba.TotalNumberOfActiveOTG <= mode_lib->vba.MaxNumOTG) { + mode_lib->vba.NumberOfOTGSupport = true; + } else { + mode_lib->vba.NumberOfOTGSupport = false; + } + /*Display IO and DSC Support Check*/ + + mode_lib->vba.NonsupportedDSCInputBPC = false; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (!(mode_lib->vba.DSCInputBitPerComponent[k] == 12.0 + || mode_lib->vba.DSCInputBitPerComponent[k] == 10.0 + || mode_lib->vba.DSCInputBitPerComponent[k] == 8.0)) { + mode_lib->vba.NonsupportedDSCInputBPC = true; + } + } + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + locals->RequiresDSC[i][k] = 0; + locals->RequiresFEC[i][k] = 0; + if (mode_lib->vba.BlendingAndTiming[k] == k) { + if (mode_lib->vba.Output[k] == dm_hdmi) { + locals->RequiresDSC[i][k] = 0; + locals->RequiresFEC[i][k] = 0; + locals->OutputBppPerState[i][k] = TruncToValidBPP( + dml_min(600.0, mode_lib->vba.PHYCLKPerState[i]) / mode_lib->vba.PixelClockBackEnd[k] * 24, + mode_lib->vba.ForcedOutputLinkBPP[k], + false, + mode_lib->vba.Output[k], + mode_lib->vba.OutputFormat[k], + mode_lib->vba.DSCInputBitPerComponent[k]); + } else if (mode_lib->vba.Output[k] == dm_dp + || mode_lib->vba.Output[k] == dm_edp) { + if (mode_lib->vba.Output[k] == dm_edp) { + mode_lib->vba.EffectiveFECOverhead = 0.0; + } else { + mode_lib->vba.EffectiveFECOverhead = + mode_lib->vba.FECOverhead; + } + if (mode_lib->vba.PHYCLKPerState[i] >= 270.0) { + mode_lib->vba.Outbpp = TruncToValidBPP( + (1.0 - mode_lib->vba.Downspreading / 100.0) * 270.0 + * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, + mode_lib->vba.ForcedOutputLinkBPP[k], + false, + mode_lib->vba.Output[k], + mode_lib->vba.OutputFormat[k], + mode_lib->vba.DSCInputBitPerComponent[k]); + mode_lib->vba.OutbppDSC = TruncToValidBPP( + (1.0 - mode_lib->vba.Downspreading / 100.0) * (1.0 - mode_lib->vba.EffectiveFECOverhead / 100.0) * 270.0 + * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, + mode_lib->vba.ForcedOutputLinkBPP[k], + true, + mode_lib->vba.Output[k], + mode_lib->vba.OutputFormat[k], + mode_lib->vba.DSCInputBitPerComponent[k]); + if (mode_lib->vba.DSCEnabled[k] == true) { + locals->RequiresDSC[i][k] = true; + if (mode_lib->vba.Output[k] == dm_dp) { + locals->RequiresFEC[i][k] = true; + } else { + locals->RequiresFEC[i][k] = false; + } + mode_lib->vba.Outbpp = mode_lib->vba.OutbppDSC; + } else { + locals->RequiresDSC[i][k] = false; + locals->RequiresFEC[i][k] = false; + } + locals->OutputBppPerState[i][k] = mode_lib->vba.Outbpp; + } + if (mode_lib->vba.Outbpp == BPP_INVALID && mode_lib->vba.PHYCLKPerState[i] >= 540.0) { + mode_lib->vba.Outbpp = TruncToValidBPP( + (1.0 - mode_lib->vba.Downspreading / 100.0) * 540.0 + * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, + mode_lib->vba.ForcedOutputLinkBPP[k], + false, + mode_lib->vba.Output[k], + mode_lib->vba.OutputFormat[k], + mode_lib->vba.DSCInputBitPerComponent[k]); + mode_lib->vba.OutbppDSC = TruncToValidBPP( + (1.0 - mode_lib->vba.Downspreading / 100.0) * (1.0 - mode_lib->vba.EffectiveFECOverhead / 100.0) * 540.0 + * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, + mode_lib->vba.ForcedOutputLinkBPP[k], + true, + mode_lib->vba.Output[k], + mode_lib->vba.OutputFormat[k], + mode_lib->vba.DSCInputBitPerComponent[k]); + if (mode_lib->vba.DSCEnabled[k] == true) { + locals->RequiresDSC[i][k] = true; + if (mode_lib->vba.Output[k] == dm_dp) { + locals->RequiresFEC[i][k] = true; + } else { + locals->RequiresFEC[i][k] = false; + } + mode_lib->vba.Outbpp = mode_lib->vba.OutbppDSC; + } else { + locals->RequiresDSC[i][k] = false; + locals->RequiresFEC[i][k] = false; + } + locals->OutputBppPerState[i][k] = mode_lib->vba.Outbpp; + } + if (mode_lib->vba.Outbpp == BPP_INVALID + && mode_lib->vba.PHYCLKPerState[i] + >= 810.0) { + mode_lib->vba.Outbpp = TruncToValidBPP( + (1.0 - mode_lib->vba.Downspreading / 100.0) * 810.0 + * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, + mode_lib->vba.ForcedOutputLinkBPP[k], + false, + mode_lib->vba.Output[k], + mode_lib->vba.OutputFormat[k], + mode_lib->vba.DSCInputBitPerComponent[k]); + mode_lib->vba.OutbppDSC = TruncToValidBPP( + (1.0 - mode_lib->vba.Downspreading / 100.0) * (1.0 - mode_lib->vba.EffectiveFECOverhead / 100.0) * 810.0 + * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, + mode_lib->vba.ForcedOutputLinkBPP[k], + true, + mode_lib->vba.Output[k], + mode_lib->vba.OutputFormat[k], + mode_lib->vba.DSCInputBitPerComponent[k]); + if (mode_lib->vba.DSCEnabled[k] == true || mode_lib->vba.Outbpp == BPP_INVALID) { + locals->RequiresDSC[i][k] = true; + if (mode_lib->vba.Output[k] == dm_dp) { + locals->RequiresFEC[i][k] = true; + } else { + locals->RequiresFEC[i][k] = false; + } + mode_lib->vba.Outbpp = mode_lib->vba.OutbppDSC; + } else { + locals->RequiresDSC[i][k] = false; + locals->RequiresFEC[i][k] = false; + } + locals->OutputBppPerState[i][k] = + mode_lib->vba.Outbpp; + } + } + } else { + locals->OutputBppPerState[i][k] = BPP_BLENDED_PIPE; + } + } + } + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + locals->DIOSupport[i] = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (!mode_lib->vba.skip_dio_check[k] + && (locals->OutputBppPerState[i][k] == BPP_INVALID + || (mode_lib->vba.OutputFormat[k] == dm_420 + && mode_lib->vba.Interlace[k] == true + && mode_lib->vba.ProgressiveToInterlaceUnitInOPP == true))) { + locals->DIOSupport[i] = false; + } + } + } + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + locals->DSCCLKRequiredMoreThanSupported[i] = false; + if (mode_lib->vba.BlendingAndTiming[k] == k) { + if ((mode_lib->vba.Output[k] == dm_dp + || mode_lib->vba.Output[k] == dm_edp)) { + if (mode_lib->vba.OutputFormat[k] == dm_420 + || mode_lib->vba.OutputFormat[k] + == dm_n422) { + mode_lib->vba.DSCFormatFactor = 2; + } else { + mode_lib->vba.DSCFormatFactor = 1; + } + if (locals->RequiresDSC[i][k] == true) { + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { + if (mode_lib->vba.PixelClockBackEnd[k] / 6.0 / mode_lib->vba.DSCFormatFactor + > (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i]) { + locals->DSCCLKRequiredMoreThanSupported[i] = + true; + } + } else { + if (mode_lib->vba.PixelClockBackEnd[k] / 3.0 / mode_lib->vba.DSCFormatFactor + > (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i]) { + locals->DSCCLKRequiredMoreThanSupported[i] = + true; + } + } + } + } + } + } + } + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + locals->NotEnoughDSCUnits[i] = false; + mode_lib->vba.TotalDSCUnitsRequired = 0.0; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (locals->RequiresDSC[i][k] == true) { + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { + mode_lib->vba.TotalDSCUnitsRequired = + mode_lib->vba.TotalDSCUnitsRequired + 2.0; + } else { + mode_lib->vba.TotalDSCUnitsRequired = + mode_lib->vba.TotalDSCUnitsRequired + 1.0; + } + } + } + if (mode_lib->vba.TotalDSCUnitsRequired > mode_lib->vba.NumberOfDSC) { + locals->NotEnoughDSCUnits[i] = true; + } + } + /*DSC Delay per state*/ + + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.BlendingAndTiming[k] != k) { + mode_lib->vba.slices = 0; + } else if (locals->RequiresDSC[i][k] == 0 + || locals->RequiresDSC[i][k] == false) { + mode_lib->vba.slices = 0; + } else if (mode_lib->vba.PixelClockBackEnd[k] > 3200.0) { + mode_lib->vba.slices = dml_ceil( + mode_lib->vba.PixelClockBackEnd[k] / 400.0, + 4.0); + } else if (mode_lib->vba.PixelClockBackEnd[k] > 1360.0) { + mode_lib->vba.slices = 8.0; + } else if (mode_lib->vba.PixelClockBackEnd[k] > 680.0) { + mode_lib->vba.slices = 4.0; + } else if (mode_lib->vba.PixelClockBackEnd[k] > 340.0) { + mode_lib->vba.slices = 2.0; + } else { + mode_lib->vba.slices = 1.0; + } + if (locals->OutputBppPerState[i][k] == BPP_BLENDED_PIPE + || locals->OutputBppPerState[i][k] == BPP_INVALID) { + mode_lib->vba.bpp = 0.0; + } else { + mode_lib->vba.bpp = locals->OutputBppPerState[i][k]; + } + if (locals->RequiresDSC[i][k] == true && mode_lib->vba.bpp != 0.0) { + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) { + locals->DSCDelayPerState[i][k] = + dscceComputeDelay( + mode_lib->vba.DSCInputBitPerComponent[k], + mode_lib->vba.bpp, + dml_ceil( + mode_lib->vba.HActive[k] + / mode_lib->vba.slices, + 1.0), + mode_lib->vba.slices, + mode_lib->vba.OutputFormat[k]) + + dscComputeDelay( + mode_lib->vba.OutputFormat[k]); + } else { + locals->DSCDelayPerState[i][k] = + 2.0 * (dscceComputeDelay( + mode_lib->vba.DSCInputBitPerComponent[k], + mode_lib->vba.bpp, + dml_ceil(mode_lib->vba.HActive[k] / mode_lib->vba.slices, 1.0), + mode_lib->vba.slices / 2, + mode_lib->vba.OutputFormat[k]) + + dscComputeDelay(mode_lib->vba.OutputFormat[k])); + } + locals->DSCDelayPerState[i][k] = + locals->DSCDelayPerState[i][k] * mode_lib->vba.PixelClock[k] / mode_lib->vba.PixelClockBackEnd[k]; + } else { + locals->DSCDelayPerState[i][k] = 0.0; + } + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + for (m = 0; m <= mode_lib->vba.NumberOfActivePlanes - 1; m++) { + for (j = 0; j <= mode_lib->vba.NumberOfActivePlanes - 1; j++) { + if (mode_lib->vba.BlendingAndTiming[k] == m && locals->RequiresDSC[i][m] == true) + locals->DSCDelayPerState[i][k] = locals->DSCDelayPerState[i][m]; + } + } + } + } + + //Prefetch Check + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + for (j = 0; j < 2; j++) { + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) + locals->SwathWidthYPerState[i][j][k] = dml_min(locals->SwathWidthYSingleDPP[k], dml_round(locals->HActive[k] / 2 * locals->HRatio[k])); + else + locals->SwathWidthYPerState[i][j][k] = locals->SwathWidthYSingleDPP[k] / locals->NoOfDPP[i][j][k]; + locals->SwathWidthGranularityY = 256 / dml_ceil(locals->BytePerPixelInDETY[k], 1) / locals->MaxSwathHeightY[k]; + locals->RoundedUpMaxSwathSizeBytesY = (dml_ceil(locals->SwathWidthYPerState[i][j][k] - 1, locals->SwathWidthGranularityY) + + locals->SwathWidthGranularityY) * locals->BytePerPixelInDETY[k] * locals->MaxSwathHeightY[k]; + if (locals->SourcePixelFormat[k] == dm_420_10) { + locals->RoundedUpMaxSwathSizeBytesY = dml_ceil(locals->RoundedUpMaxSwathSizeBytesY, 256) + 256; + } + if (locals->MaxSwathHeightC[k] > 0) { + locals->SwathWidthGranularityC = 256 / dml_ceil(locals->BytePerPixelInDETC[k], 2) / locals->MaxSwathHeightC[k]; + + locals->RoundedUpMaxSwathSizeBytesC = (dml_ceil(locals->SwathWidthYPerState[i][j][k] / 2 - 1, locals->SwathWidthGranularityC) + + locals->SwathWidthGranularityC) * locals->BytePerPixelInDETC[k] * locals->MaxSwathHeightC[k]; + } + if (locals->SourcePixelFormat[k] == dm_420_10) { + locals->RoundedUpMaxSwathSizeBytesC = dml_ceil(locals->RoundedUpMaxSwathSizeBytesC, 256) + 256; + } else { + locals->RoundedUpMaxSwathSizeBytesC = 0; + } + + if (locals->RoundedUpMaxSwathSizeBytesY + locals->RoundedUpMaxSwathSizeBytesC <= locals->DETBufferSizeInKByte[0] * 1024 / 2) { + locals->SwathHeightYPerState[i][j][k] = locals->MaxSwathHeightY[k]; + locals->SwathHeightCPerState[i][j][k] = locals->MaxSwathHeightC[k]; + } else { + locals->SwathHeightYPerState[i][j][k] = locals->MinSwathHeightY[k]; + locals->SwathHeightCPerState[i][j][k] = locals->MinSwathHeightC[k]; + } + + if (locals->BytePerPixelInDETC[k] == 0) { + locals->LinesInDETLuma = locals->DETBufferSizeInKByte[0] * 1024 / locals->BytePerPixelInDETY[k] / locals->SwathWidthYPerState[i][j][k]; + locals->LinesInDETChroma = 0; + } else if (locals->SwathHeightYPerState[i][j][k] <= locals->SwathHeightCPerState[i][j][k]) { + locals->LinesInDETLuma = locals->DETBufferSizeInKByte[0] * 1024 / 2 / locals->BytePerPixelInDETY[k] / + locals->SwathWidthYPerState[i][j][k]; + locals->LinesInDETChroma = locals->DETBufferSizeInKByte[0] * 1024 / 2 / locals->BytePerPixelInDETC[k] / (locals->SwathWidthYPerState[i][j][k] / 2); + } else { + locals->LinesInDETLuma = locals->DETBufferSizeInKByte[0] * 1024 * 2 / 3 / locals->BytePerPixelInDETY[k] / locals->SwathWidthYPerState[i][j][k]; + locals->LinesInDETChroma = locals->DETBufferSizeInKByte[0] * 1024 / 3 / locals->BytePerPixelInDETY[k] / (locals->SwathWidthYPerState[i][j][k] / 2); + } + + locals->EffectiveLBLatencyHidingSourceLinesLuma = dml_min(locals->MaxLineBufferLines, + dml_floor(locals->LineBufferSize / locals->LBBitPerPixel[k] / (locals->SwathWidthYPerState[i][j][k] + / dml_max(locals->HRatio[k], 1)), 1)) - (locals->vtaps[k] - 1); + + locals->EffectiveLBLatencyHidingSourceLinesChroma = dml_min(locals->MaxLineBufferLines, + dml_floor(locals->LineBufferSize / locals->LBBitPerPixel[k] + / (locals->SwathWidthYPerState[i][j][k] / 2 + / dml_max(locals->HRatio[k] / 2, 1)), 1)) - (locals->VTAPsChroma[k] - 1); + + locals->EffectiveDETLBLinesLuma = dml_floor(locals->LinesInDETLuma + dml_min( + locals->LinesInDETLuma * locals->RequiredDISPCLK[i][j] * locals->BytePerPixelInDETY[k] * + locals->PSCL_FACTOR[k] / locals->ReturnBWPerState[i][0], + locals->EffectiveLBLatencyHidingSourceLinesLuma), + locals->SwathHeightYPerState[i][j][k]); + + + if (locals->BytePerPixelInDETC[k] == 0) { + locals->UrgentLatencySupportUsPerState[i][j][k] = locals->EffectiveDETLBLinesLuma * (locals->HTotal[k] / locals->PixelClock[k]) + / locals->VRatio[k] - locals->EffectiveDETLBLinesLuma * locals->SwathWidthYPerState[i][j][k] * + dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i][0] / locals->NoOfDPP[i][j][k]); + } else { + locals->EffectiveDETLBLinesChroma = dml_floor(locals->LinesInDETChroma + dml_min( + locals->LinesInDETChroma * locals->RequiredDISPCLK[i][j] * locals->BytePerPixelInDETC[k] * + locals->PSCL_FACTOR_CHROMA[k] / locals->ReturnBWPerState[i][0], + locals->EffectiveLBLatencyHidingSourceLinesChroma), + locals->SwathHeightCPerState[i][j][k]); + locals->UrgentLatencySupportUsPerState[i][j][k] = dml_min( + locals->EffectiveDETLBLinesLuma * (locals->HTotal[k] / locals->PixelClock[k]) + / locals->VRatio[k] - locals->EffectiveDETLBLinesLuma * locals->SwathWidthYPerState[i][j][k] * + dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i][0] / locals->NoOfDPP[i][j][k]), + locals->EffectiveDETLBLinesChroma * (locals->HTotal[k] / locals->PixelClock[k]) / (locals->VRatio[k] / 2) - + locals->EffectiveDETLBLinesChroma * locals->SwathWidthYPerState[i][j][k] / 2 * + dml_ceil(locals->BytePerPixelInDETC[k], 2) / (locals->ReturnBWPerState[i][0] / locals->NoOfDPP[i][j][k])); + } + } + } + } + + for (i = 0; i <= locals->soc.num_states; i++) { + for (j = 0; j < 2; j++) { + locals->UrgentLatencySupport[i][j] = true; + for (k = 0; k < locals->NumberOfActivePlanes; k++) { + if (locals->UrgentLatencySupportUsPerState[i][j][k] < locals->UrgentLatency) + locals->UrgentLatencySupport[i][j] = false; + } + } + } + + + /*Prefetch Check*/ + for (i = 0; i <= locals->soc.num_states; i++) { + for (j = 0; j < 2; j++) { + locals->TotalNumberOfDCCActiveDPP[i][j] = 0; + for (k = 0; k < locals->NumberOfActivePlanes; k++) { + if (locals->DCCEnable[k] == true) { + locals->TotalNumberOfDCCActiveDPP[i][j] = + locals->TotalNumberOfDCCActiveDPP[i][j] + locals->NoOfDPP[i][j][k]; + } + } + } + } + + CalculateMinAndMaxPrefetchMode(locals->AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &locals->MinPrefetchMode, &locals->MaxPrefetchMode); + + locals->MaxTotalVActiveRDBandwidth = 0; + for (k = 0; k < locals->NumberOfActivePlanes; k++) { + locals->MaxTotalVActiveRDBandwidth = locals->MaxTotalVActiveRDBandwidth + locals->ReadBandwidth[k]; + } + + for (i = 0; i <= locals->soc.num_states; i++) { + for (j = 0; j < 2; j++) { + for (k = 0; k < locals->NumberOfActivePlanes; k++) { + locals->NoOfDPPThisState[k] = locals->NoOfDPP[i][j][k]; + locals->RequiredDPPCLKThisState[k] = locals->RequiredDPPCLK[i][j][k]; + locals->SwathHeightYThisState[k] = locals->SwathHeightYPerState[i][j][k]; + locals->SwathHeightCThisState[k] = locals->SwathHeightCPerState[i][j][k]; + locals->SwathWidthYThisState[k] = locals->SwathWidthYPerState[i][j][k]; + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = dml_max( + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], + mode_lib->vba.PixelClock[k] / 16.0); + if (mode_lib->vba.BytePerPixelInDETC[k] == 0.0) { + if (mode_lib->vba.VRatio[k] <= 1.0) { + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = + dml_max( + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], + 1.1 + * dml_ceil( + mode_lib->vba.BytePerPixelInDETY[k], + 1.0) + / 64.0 + * mode_lib->vba.HRatio[k] + * mode_lib->vba.PixelClock[k] + / mode_lib->vba.NoOfDPP[i][j][k]); + } else { + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = + dml_max( + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], + 1.1 + * dml_ceil( + mode_lib->vba.BytePerPixelInDETY[k], + 1.0) + / 64.0 + * mode_lib->vba.PSCL_FACTOR[k] + * mode_lib->vba.RequiredDPPCLK[i][j][k]); + } + } else { + if (mode_lib->vba.VRatio[k] <= 1.0) { + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = + dml_max( + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], + 1.1 + * dml_ceil( + mode_lib->vba.BytePerPixelInDETY[k], + 1.0) + / 32.0 + * mode_lib->vba.HRatio[k] + * mode_lib->vba.PixelClock[k] + / mode_lib->vba.NoOfDPP[i][j][k]); + } else { + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = + dml_max( + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], + 1.1 + * dml_ceil( + mode_lib->vba.BytePerPixelInDETY[k], + 1.0) + / 32.0 + * mode_lib->vba.PSCL_FACTOR[k] + * mode_lib->vba.RequiredDPPCLK[i][j][k]); + } + if (mode_lib->vba.VRatio[k] / 2.0 <= 1.0) { + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = + dml_max( + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], + 1.1 + * dml_ceil( + mode_lib->vba.BytePerPixelInDETC[k], + 2.0) + / 32.0 + * mode_lib->vba.HRatio[k] + / 2.0 + * mode_lib->vba.PixelClock[k] + / mode_lib->vba.NoOfDPP[i][j][k]); + } else { + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = + dml_max( + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], + 1.1 + * dml_ceil( + mode_lib->vba.BytePerPixelInDETC[k], + 2.0) + / 32.0 + * mode_lib->vba.PSCL_FACTOR_CHROMA[k] + * mode_lib->vba.RequiredDPPCLK[i][j][k]); + } + } + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + mode_lib->vba.PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes( + mode_lib, + mode_lib->vba.DCCEnable[k], + mode_lib->vba.Read256BlockHeightY[k], + mode_lib->vba.Read256BlockWidthY[k], + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.SurfaceTiling[k], + dml_ceil(mode_lib->vba.BytePerPixelInDETY[k], 1.0), + mode_lib->vba.SourceScan[k], + mode_lib->vba.ViewportWidth[k], + mode_lib->vba.ViewportHeight[k], + mode_lib->vba.SwathWidthYPerState[i][j][k], + mode_lib->vba.GPUVMEnable, + mode_lib->vba.VMMPageSize, + mode_lib->vba.PTEBufferSizeInRequestsLuma, + mode_lib->vba.PDEProcessingBufIn64KBReqs, + mode_lib->vba.PitchY[k], + mode_lib->vba.DCCMetaPitchY[k], + &mode_lib->vba.MacroTileWidthY[k], + &mode_lib->vba.MetaRowBytesY, + &mode_lib->vba.DPTEBytesPerRowY, + &mode_lib->vba.PTEBufferSizeNotExceededY[i][j][k], + &mode_lib->vba.dpte_row_height[k], + &mode_lib->vba.meta_row_height[k]); + mode_lib->vba.PrefetchLinesY[0][0][k] = CalculatePrefetchSourceLines( + mode_lib, + mode_lib->vba.VRatio[k], + mode_lib->vba.vtaps[k], + mode_lib->vba.Interlace[k], + mode_lib->vba.ProgressiveToInterlaceUnitInOPP, + mode_lib->vba.SwathHeightYPerState[i][j][k], + mode_lib->vba.ViewportYStartY[k], + &mode_lib->vba.PrefillY[k], + &mode_lib->vba.MaxNumSwY[k]); + if ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8)) { + mode_lib->vba.PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes( + mode_lib, + mode_lib->vba.DCCEnable[k], + mode_lib->vba.Read256BlockHeightY[k], + mode_lib->vba.Read256BlockWidthY[k], + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.SurfaceTiling[k], + dml_ceil(mode_lib->vba.BytePerPixelInDETC[k], 2.0), + mode_lib->vba.SourceScan[k], + mode_lib->vba.ViewportWidth[k] / 2.0, + mode_lib->vba.ViewportHeight[k] / 2.0, + mode_lib->vba.SwathWidthYPerState[i][j][k] / 2.0, + mode_lib->vba.GPUVMEnable, + mode_lib->vba.VMMPageSize, + mode_lib->vba.PTEBufferSizeInRequestsLuma, + mode_lib->vba.PDEProcessingBufIn64KBReqs, + mode_lib->vba.PitchC[k], + 0.0, + &mode_lib->vba.MacroTileWidthC[k], + &mode_lib->vba.MetaRowBytesC, + &mode_lib->vba.DPTEBytesPerRowC, + &mode_lib->vba.PTEBufferSizeNotExceededC[i][j][k], + &mode_lib->vba.dpte_row_height_chroma[k], + &mode_lib->vba.meta_row_height_chroma[k]); + mode_lib->vba.PrefetchLinesC[0][0][k] = CalculatePrefetchSourceLines( + mode_lib, + mode_lib->vba.VRatio[k] / 2.0, + mode_lib->vba.VTAPsChroma[k], + mode_lib->vba.Interlace[k], + mode_lib->vba.ProgressiveToInterlaceUnitInOPP, + mode_lib->vba.SwathHeightCPerState[i][j][k], + mode_lib->vba.ViewportYStartC[k], + &mode_lib->vba.PrefillC[k], + &mode_lib->vba.MaxNumSwC[k]); + } else { + mode_lib->vba.PDEAndMetaPTEBytesPerFrameC = 0.0; + mode_lib->vba.MetaRowBytesC = 0.0; + mode_lib->vba.DPTEBytesPerRowC = 0.0; + locals->PrefetchLinesC[0][0][k] = 0.0; + locals->PTEBufferSizeNotExceededC[i][j][k] = true; + locals->PTEBufferSizeInRequestsForLuma = mode_lib->vba.PTEBufferSizeInRequestsLuma + mode_lib->vba.PTEBufferSizeInRequestsChroma; + } + locals->PDEAndMetaPTEBytesPerFrame[0][0][k] = + mode_lib->vba.PDEAndMetaPTEBytesPerFrameY + mode_lib->vba.PDEAndMetaPTEBytesPerFrameC; + locals->MetaRowBytes[0][0][k] = mode_lib->vba.MetaRowBytesY + mode_lib->vba.MetaRowBytesC; + locals->DPTEBytesPerRow[0][0][k] = mode_lib->vba.DPTEBytesPerRowY + mode_lib->vba.DPTEBytesPerRowC; + + CalculateActiveRowBandwidth( + mode_lib->vba.GPUVMEnable, + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.VRatio[k], + mode_lib->vba.DCCEnable[k], + mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k], + mode_lib->vba.MetaRowBytesY, + mode_lib->vba.MetaRowBytesC, + mode_lib->vba.meta_row_height[k], + mode_lib->vba.meta_row_height_chroma[k], + mode_lib->vba.DPTEBytesPerRowY, + mode_lib->vba.DPTEBytesPerRowC, + mode_lib->vba.dpte_row_height[k], + mode_lib->vba.dpte_row_height_chroma[k], + &mode_lib->vba.meta_row_bw[k], + &mode_lib->vba.dpte_row_bw[k], + &mode_lib->vba.qual_row_bw[k]); + } + mode_lib->vba.ExtraLatency = + mode_lib->vba.UrgentRoundTripAndOutOfOrderLatencyPerState[i] + + (mode_lib->vba.TotalNumberOfActiveDPP[i][j] + * mode_lib->vba.PixelChunkSizeInKByte + + mode_lib->vba.TotalNumberOfDCCActiveDPP[i][j] + * mode_lib->vba.MetaChunkSize) + * 1024.0 + / mode_lib->vba.ReturnBWPerState[i][0]; + if (mode_lib->vba.GPUVMEnable == true) { + mode_lib->vba.ExtraLatency = mode_lib->vba.ExtraLatency + + mode_lib->vba.TotalNumberOfActiveDPP[i][j] + * mode_lib->vba.PTEGroupSize + / mode_lib->vba.ReturnBWPerState[i][0]; + } + mode_lib->vba.TimeCalc = 24.0 / mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0]; + + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.BlendingAndTiming[k] == k) { + if (mode_lib->vba.WritebackEnable[k] == true) { + locals->WritebackDelay[i][k] = mode_lib->vba.WritebackLatency + + CalculateWriteBackDelay( + mode_lib->vba.WritebackPixelFormat[k], + mode_lib->vba.WritebackHRatio[k], + mode_lib->vba.WritebackVRatio[k], + mode_lib->vba.WritebackLumaHTaps[k], + mode_lib->vba.WritebackLumaVTaps[k], + mode_lib->vba.WritebackChromaHTaps[k], + mode_lib->vba.WritebackChromaVTaps[k], + mode_lib->vba.WritebackDestinationWidth[k]) / locals->RequiredDISPCLK[i][j]; + } else { + locals->WritebackDelay[i][k] = 0.0; + } + for (m = 0; m <= mode_lib->vba.NumberOfActivePlanes - 1; m++) { + if (mode_lib->vba.BlendingAndTiming[m] == k + && mode_lib->vba.WritebackEnable[m] + == true) { + locals->WritebackDelay[i][k] = dml_max(locals->WritebackDelay[i][k], + mode_lib->vba.WritebackLatency + CalculateWriteBackDelay( + mode_lib->vba.WritebackPixelFormat[m], + mode_lib->vba.WritebackHRatio[m], + mode_lib->vba.WritebackVRatio[m], + mode_lib->vba.WritebackLumaHTaps[m], + mode_lib->vba.WritebackLumaVTaps[m], + mode_lib->vba.WritebackChromaHTaps[m], + mode_lib->vba.WritebackChromaVTaps[m], + mode_lib->vba.WritebackDestinationWidth[m]) / locals->RequiredDISPCLK[i][j]); + } + } + } + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + for (m = 0; m <= mode_lib->vba.NumberOfActivePlanes - 1; m++) { + if (mode_lib->vba.BlendingAndTiming[k] == m) { + locals->WritebackDelay[i][k] = locals->WritebackDelay[i][m]; + } + } + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + for (m = 0; m < locals->NumberOfCursors[k]; m++) + locals->cursor_bw[k] = locals->NumberOfCursors[k] * locals->CursorWidth[k][m] * locals->CursorBPP[k][m] + / 8 / (locals->HTotal[k] / locals->PixelClock[k]) * locals->VRatio[k]; + } + + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + locals->MaximumVStartup[0][0][k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k] + - dml_max(1.0, dml_ceil(locals->WritebackDelay[i][k] / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]), 1.0)); + } + + mode_lib->vba.NextPrefetchMode = mode_lib->vba.MinPrefetchMode; + do { + mode_lib->vba.PrefetchMode[i][j] = mode_lib->vba.NextPrefetchMode; + mode_lib->vba.NextPrefetchMode = mode_lib->vba.NextPrefetchMode + 1; + + mode_lib->vba.TWait = CalculateTWait( + mode_lib->vba.PrefetchMode[i][j], + mode_lib->vba.DRAMClockChangeLatency, + mode_lib->vba.UrgentLatency, + mode_lib->vba.SREnterPlusExitTime); + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + + if (mode_lib->vba.XFCEnabled[k] == true) { + mode_lib->vba.XFCRemoteSurfaceFlipDelay = + CalculateRemoteSurfaceFlipDelay( + mode_lib, + mode_lib->vba.VRatio[k], + locals->SwathWidthYPerState[i][j][k], + dml_ceil(locals->BytePerPixelInDETY[k], 1.0), + mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k], + mode_lib->vba.XFCTSlvVupdateOffset, + mode_lib->vba.XFCTSlvVupdateWidth, + mode_lib->vba.XFCTSlvVreadyOffset, + mode_lib->vba.XFCXBUFLatencyTolerance, + mode_lib->vba.XFCFillBWOverhead, + mode_lib->vba.XFCSlvChunkSize, + mode_lib->vba.XFCBusTransportTime, + mode_lib->vba.TimeCalc, + mode_lib->vba.TWait, + &mode_lib->vba.SrcActiveDrainRate, + &mode_lib->vba.TInitXFill, + &mode_lib->vba.TslvChk); + } else { + mode_lib->vba.XFCRemoteSurfaceFlipDelay = 0.0; + } + + CalculateDelayAfterScaler(mode_lib, mode_lib->vba.ReturnBWPerState[i][0], mode_lib->vba.ReadBandwidthLuma[k], mode_lib->vba.ReadBandwidthChroma[k], mode_lib->vba.MaxTotalVActiveRDBandwidth, + mode_lib->vba.DisplayPipeLineDeliveryTimeLuma[k], mode_lib->vba.DisplayPipeLineDeliveryTimeChroma[k], + mode_lib->vba.RequiredDPPCLK[i][j][k], mode_lib->vba.RequiredDISPCLK[i][j], mode_lib->vba.PixelClock[k], mode_lib->vba.DSCDelayPerState[i][k], mode_lib->vba.NoOfDPP[i][j][k], mode_lib->vba.ScalerEnabled[k], mode_lib->vba.NumberOfCursors[k], + mode_lib->vba.DPPCLKDelaySubtotal, mode_lib->vba.DPPCLKDelaySCL, mode_lib->vba.DPPCLKDelaySCLLBOnly, mode_lib->vba.DPPCLKDelayCNVCFormater, mode_lib->vba.DPPCLKDelayCNVCCursor, mode_lib->vba.DISPCLKDelaySubtotal, + mode_lib->vba.SwathWidthYPerState[i][j][k] / mode_lib->vba.HRatio[k], mode_lib->vba.OutputFormat[k], mode_lib->vba.HTotal[k], + mode_lib->vba.SwathWidthYSingleDPP[k], mode_lib->vba.BytePerPixelInDETY[k], mode_lib->vba.BytePerPixelInDETC[k], mode_lib->vba.SwathHeightYThisState[k], mode_lib->vba.SwathHeightCThisState[k], mode_lib->vba.Interlace[k], mode_lib->vba.ProgressiveToInterlaceUnitInOPP, + &mode_lib->vba.DSTXAfterScaler[k], &mode_lib->vba.DSTYAfterScaler[k]); + + mode_lib->vba.IsErrorResult[i][j][k] = + CalculatePrefetchSchedule( + mode_lib, + mode_lib->vba.RequiredDPPCLK[i][j][k], + mode_lib->vba.RequiredDISPCLK[i][j], + mode_lib->vba.PixelClock[k], + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], + mode_lib->vba.NoOfDPP[i][j][k], + mode_lib->vba.NumberOfCursors[k], + mode_lib->vba.VTotal[k] + - mode_lib->vba.VActive[k], + mode_lib->vba.HTotal[k], + mode_lib->vba.MaxInterDCNTileRepeaters, + mode_lib->vba.MaximumVStartup[0][0][k], + mode_lib->vba.GPUVMMaxPageTableLevels, + mode_lib->vba.GPUVMEnable, + mode_lib->vba.DynamicMetadataEnable[k], + mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired[k], + mode_lib->vba.DynamicMetadataTransmittedBytes[k], + mode_lib->vba.DCCEnable[k], + mode_lib->vba.UrgentLatencyPixelDataOnly, + mode_lib->vba.ExtraLatency, + mode_lib->vba.TimeCalc, + mode_lib->vba.PDEAndMetaPTEBytesPerFrame[0][0][k], + mode_lib->vba.MetaRowBytes[0][0][k], + mode_lib->vba.DPTEBytesPerRow[0][0][k], + mode_lib->vba.PrefetchLinesY[0][0][k], + mode_lib->vba.SwathWidthYPerState[i][j][k], + mode_lib->vba.BytePerPixelInDETY[k], + mode_lib->vba.PrefillY[k], + mode_lib->vba.MaxNumSwY[k], + mode_lib->vba.PrefetchLinesC[0][0][k], + mode_lib->vba.BytePerPixelInDETC[k], + mode_lib->vba.PrefillC[k], + mode_lib->vba.MaxNumSwC[k], + mode_lib->vba.SwathHeightYPerState[i][j][k], + mode_lib->vba.SwathHeightCPerState[i][j][k], + mode_lib->vba.TWait, + mode_lib->vba.XFCEnabled[k], + mode_lib->vba.XFCRemoteSurfaceFlipDelay, + mode_lib->vba.Interlace[k], + mode_lib->vba.ProgressiveToInterlaceUnitInOPP, + mode_lib->vba.DSTXAfterScaler[k], + mode_lib->vba.DSTYAfterScaler[k], + &mode_lib->vba.LineTimesForPrefetch[k], + &mode_lib->vba.PrefetchBW[k], + &mode_lib->vba.LinesForMetaPTE[k], + &mode_lib->vba.LinesForMetaAndDPTERow[k], + &mode_lib->vba.VRatioPreY[i][j][k], + &mode_lib->vba.VRatioPreC[i][j][k], + &mode_lib->vba.RequiredPrefetchPixelDataBWLuma[i][j][k], + &mode_lib->vba.Tno_bw[k], + &mode_lib->vba.VUpdateOffsetPix[k], + &mode_lib->vba.VUpdateWidthPix[k], + &mode_lib->vba.VReadyOffsetPix[k]); + } + mode_lib->vba.MaximumReadBandwidthWithoutPrefetch = 0.0; + mode_lib->vba.MaximumReadBandwidthWithPrefetch = 0.0; + locals->prefetch_vm_bw_valid = true; + locals->prefetch_row_bw_valid = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (locals->PDEAndMetaPTEBytesPerFrame[0][0][k] == 0) + locals->prefetch_vm_bw[k] = 0; + else if (locals->LinesForMetaPTE[k] > 0) + locals->prefetch_vm_bw[k] = locals->PDEAndMetaPTEBytesPerFrame[0][0][k] + / (locals->LinesForMetaPTE[k] * locals->HTotal[k] / locals->PixelClock[k]); + else { + locals->prefetch_vm_bw[k] = 0; + locals->prefetch_vm_bw_valid = false; + } + if (locals->MetaRowBytes[0][0][k] + locals->DPTEBytesPerRow[0][0][k] == 0) + locals->prefetch_row_bw[k] = 0; + else if (locals->LinesForMetaAndDPTERow[k] > 0) + locals->prefetch_row_bw[k] = (locals->MetaRowBytes[0][0][k] + locals->DPTEBytesPerRow[0][0][k]) + / (locals->LinesForMetaAndDPTERow[k] * locals->HTotal[k] / locals->PixelClock[k]); + else { + locals->prefetch_row_bw[k] = 0; + locals->prefetch_row_bw_valid = false; + } + + mode_lib->vba.MaximumReadBandwidthWithoutPrefetch = mode_lib->vba.MaximumReadBandwidthWithPrefetch + + mode_lib->vba.cursor_bw[k] + mode_lib->vba.ReadBandwidth[k] + mode_lib->vba.meta_row_bw[k] + mode_lib->vba.dpte_row_bw[k]; + mode_lib->vba.MaximumReadBandwidthWithPrefetch = + mode_lib->vba.MaximumReadBandwidthWithPrefetch + + mode_lib->vba.cursor_bw[k] + + dml_max3( + mode_lib->vba.prefetch_vm_bw[k], + mode_lib->vba.prefetch_row_bw[k], + dml_max(mode_lib->vba.ReadBandwidth[k], + mode_lib->vba.RequiredPrefetchPixelDataBWLuma[i][j][k]) + + mode_lib->vba.meta_row_bw[k] + mode_lib->vba.dpte_row_bw[k]); + } + locals->BandwidthWithoutPrefetchSupported[i][0] = true; + if (mode_lib->vba.MaximumReadBandwidthWithoutPrefetch > locals->ReturnBWPerState[i][0]) { + locals->BandwidthWithoutPrefetchSupported[i][0] = false; + } + + locals->PrefetchSupported[i][j] = true; + if (mode_lib->vba.MaximumReadBandwidthWithPrefetch > locals->ReturnBWPerState[i][0]) { + locals->PrefetchSupported[i][j] = false; + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (locals->LineTimesForPrefetch[k] < 2.0 + || locals->LinesForMetaPTE[k] >= 8.0 + || locals->LinesForMetaAndDPTERow[k] >= 16.0 + || mode_lib->vba.IsErrorResult[i][j][k] == true) { + locals->PrefetchSupported[i][j] = false; + } + } + locals->VRatioInPrefetchSupported[i][j] = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (locals->VRatioPreY[i][j][k] > 4.0 + || locals->VRatioPreC[i][j][k] > 4.0 + || mode_lib->vba.IsErrorResult[i][j][k] == true) { + locals->VRatioInPrefetchSupported[i][j] = false; + } + } + } while ((locals->PrefetchSupported[i][j] != true || locals->VRatioInPrefetchSupported[i][j] != true) + && mode_lib->vba.NextPrefetchMode < mode_lib->vba.MaxPrefetchMode); + + if (mode_lib->vba.PrefetchSupported[i][j] == true + && mode_lib->vba.VRatioInPrefetchSupported[i][j] == true) { + mode_lib->vba.BandwidthAvailableForImmediateFlip = + mode_lib->vba.ReturnBWPerState[i][0]; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + mode_lib->vba.BandwidthAvailableForImmediateFlip = + mode_lib->vba.BandwidthAvailableForImmediateFlip + - mode_lib->vba.cursor_bw[k] + - dml_max( + mode_lib->vba.ReadBandwidth[k] + mode_lib->vba.qual_row_bw[k], + mode_lib->vba.PrefetchBW[k]); + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + mode_lib->vba.ImmediateFlipBytes[k] = 0.0; + if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8 + && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) { + mode_lib->vba.ImmediateFlipBytes[k] = + mode_lib->vba.PDEAndMetaPTEBytesPerFrame[0][0][k] + + mode_lib->vba.MetaRowBytes[0][0][k] + + mode_lib->vba.DPTEBytesPerRow[0][0][k]; + } + } + mode_lib->vba.TotImmediateFlipBytes = 0.0; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8 + && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) { + mode_lib->vba.TotImmediateFlipBytes = + mode_lib->vba.TotImmediateFlipBytes + + mode_lib->vba.ImmediateFlipBytes[k]; + } + } + + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + CalculateFlipSchedule( + mode_lib, + mode_lib->vba.ExtraLatency, + mode_lib->vba.UrgentLatencyPixelDataOnly, + mode_lib->vba.GPUVMMaxPageTableLevels, + mode_lib->vba.GPUVMEnable, + mode_lib->vba.BandwidthAvailableForImmediateFlip, + mode_lib->vba.TotImmediateFlipBytes, + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.ImmediateFlipBytes[k], + mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k], + mode_lib->vba.VRatio[k], + mode_lib->vba.Tno_bw[k], + mode_lib->vba.PDEAndMetaPTEBytesPerFrame[0][0][k], + mode_lib->vba.MetaRowBytes[0][0][k], + mode_lib->vba.DPTEBytesPerRow[0][0][k], + mode_lib->vba.DCCEnable[k], + mode_lib->vba.dpte_row_height[k], + mode_lib->vba.meta_row_height[k], + mode_lib->vba.qual_row_bw[k], + &mode_lib->vba.DestinationLinesToRequestVMInImmediateFlip[k], + &mode_lib->vba.DestinationLinesToRequestRowInImmediateFlip[k], + &mode_lib->vba.final_flip_bw[k], + &mode_lib->vba.ImmediateFlipSupportedForPipe[k]); + } + mode_lib->vba.total_dcn_read_bw_with_flip = 0.0; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + mode_lib->vba.total_dcn_read_bw_with_flip = + mode_lib->vba.total_dcn_read_bw_with_flip + + mode_lib->vba.cursor_bw[k] + + dml_max3( + mode_lib->vba.prefetch_vm_bw[k], + mode_lib->vba.prefetch_row_bw[k], + mode_lib->vba.final_flip_bw[k] + + dml_max( + mode_lib->vba.ReadBandwidth[k], + mode_lib->vba.RequiredPrefetchPixelDataBWLuma[i][j][k])); + } + mode_lib->vba.ImmediateFlipSupportedForState[i][j] = true; + if (mode_lib->vba.total_dcn_read_bw_with_flip + > mode_lib->vba.ReturnBWPerState[i][0]) { + mode_lib->vba.ImmediateFlipSupportedForState[i][j] = false; + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.ImmediateFlipSupportedForPipe[k] == false) { + mode_lib->vba.ImmediateFlipSupportedForState[i][j] = false; + } + } + } else { + mode_lib->vba.ImmediateFlipSupportedForState[i][j] = false; + } + } + } + + /*Vertical Active BW support*/ + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i][0] = dml_min(mode_lib->vba.ReturnBusWidth * + mode_lib->vba.DCFCLKPerState[i], mode_lib->vba.FabricAndDRAMBandwidthPerState[i] * 1000) * + mode_lib->vba.MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100; + if (mode_lib->vba.MaxTotalVActiveRDBandwidth <= mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i][0]) + mode_lib->vba.TotalVerticalActiveBandwidthSupport[i][0] = true; + else + mode_lib->vba.TotalVerticalActiveBandwidthSupport[i][0] = false; + } + + /*PTE Buffer Size Check*/ + + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + for (j = 0; j < 2; j++) { + locals->PTEBufferSizeNotExceeded[i][j] = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (locals->PTEBufferSizeNotExceededY[i][j][k] == false + || locals->PTEBufferSizeNotExceededC[i][j][k] == false) { + locals->PTEBufferSizeNotExceeded[i][j] = false; + } + } + } + } + /*Cursor Support Check*/ + mode_lib->vba.CursorSupport = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + for (j = 0; j < 2; j++) { + if (mode_lib->vba.CursorWidth[k][j] > 0.0) { + if (dml_floor( + dml_floor( + mode_lib->vba.CursorBufferSize + - mode_lib->vba.CursorChunkSize, + mode_lib->vba.CursorChunkSize) * 1024.0 + / (mode_lib->vba.CursorWidth[k][j] + * mode_lib->vba.CursorBPP[k][j] + / 8.0), + 1.0) + * (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) + / mode_lib->vba.VRatio[k] < mode_lib->vba.UrgentLatencyPixelDataOnly + || (mode_lib->vba.CursorBPP[k][j] == 64.0 + && mode_lib->vba.Cursor64BppSupport == false)) { + mode_lib->vba.CursorSupport = false; + } + } + } + } + /*Valid Pitch Check*/ + + mode_lib->vba.PitchSupport = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + locals->AlignedYPitch[k] = dml_ceil( + dml_max(mode_lib->vba.PitchY[k], mode_lib->vba.ViewportWidth[k]), + locals->MacroTileWidthY[k]); + if (locals->AlignedYPitch[k] > mode_lib->vba.PitchY[k]) { + mode_lib->vba.PitchSupport = false; + } + if (mode_lib->vba.DCCEnable[k] == true) { + locals->AlignedDCCMetaPitch[k] = dml_ceil( + dml_max( + mode_lib->vba.DCCMetaPitchY[k], + mode_lib->vba.ViewportWidth[k]), + 64.0 * locals->Read256BlockWidthY[k]); + } else { + locals->AlignedDCCMetaPitch[k] = mode_lib->vba.DCCMetaPitchY[k]; + } + if (locals->AlignedDCCMetaPitch[k] > mode_lib->vba.DCCMetaPitchY[k]) { + mode_lib->vba.PitchSupport = false; + } + if (mode_lib->vba.SourcePixelFormat[k] != dm_444_64 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8) { + locals->AlignedCPitch[k] = dml_ceil( + dml_max( + mode_lib->vba.PitchC[k], + mode_lib->vba.ViewportWidth[k] / 2.0), + locals->MacroTileWidthC[k]); + } else { + locals->AlignedCPitch[k] = mode_lib->vba.PitchC[k]; + } + if (locals->AlignedCPitch[k] > mode_lib->vba.PitchC[k]) { + mode_lib->vba.PitchSupport = false; + } + } + /*Mode Support, Voltage State and SOC Configuration*/ + + for (i = mode_lib->vba.soc.num_states; i >= 0; i--) { + for (j = 0; j < 2; j++) { + enum dm_validation_status status = DML_VALIDATION_OK; + + if (mode_lib->vba.ScaleRatioAndTapsSupport != true) { + status = DML_FAIL_SCALE_RATIO_TAP; + } else if (mode_lib->vba.SourceFormatPixelAndScanSupport != true) { + status = DML_FAIL_SOURCE_PIXEL_FORMAT; + } else if (locals->ViewportSizeSupport[i][0] != true) { + status = DML_FAIL_VIEWPORT_SIZE; + } else if (locals->DIOSupport[i] != true) { + status = DML_FAIL_DIO_SUPPORT; + } else if (locals->NotEnoughDSCUnits[i] != false) { + status = DML_FAIL_NOT_ENOUGH_DSC; + } else if (locals->DSCCLKRequiredMoreThanSupported[i] != false) { + status = DML_FAIL_DSC_CLK_REQUIRED; + } else if (locals->UrgentLatencySupport[i][j] != true) { + status = DML_FAIL_URGENT_LATENCY; + } else if (locals->ROBSupport[i][0] != true) { + status = DML_FAIL_REORDERING_BUFFER; + } else if (locals->DISPCLK_DPPCLK_Support[i][j] != true) { + status = DML_FAIL_DISPCLK_DPPCLK; + } else if (locals->TotalAvailablePipesSupport[i][j] != true) { + status = DML_FAIL_TOTAL_AVAILABLE_PIPES; + } else if (mode_lib->vba.NumberOfOTGSupport != true) { + status = DML_FAIL_NUM_OTG; + } else if (mode_lib->vba.WritebackModeSupport != true) { + status = DML_FAIL_WRITEBACK_MODE; + } else if (mode_lib->vba.WritebackLatencySupport != true) { + status = DML_FAIL_WRITEBACK_LATENCY; + } else if (mode_lib->vba.WritebackScaleRatioAndTapsSupport != true) { + status = DML_FAIL_WRITEBACK_SCALE_RATIO_TAP; + } else if (mode_lib->vba.CursorSupport != true) { + status = DML_FAIL_CURSOR_SUPPORT; + } else if (mode_lib->vba.PitchSupport != true) { + status = DML_FAIL_PITCH_SUPPORT; + } else if (locals->PrefetchSupported[i][j] != true) { + status = DML_FAIL_PREFETCH_SUPPORT; + } else if (locals->TotalVerticalActiveBandwidthSupport[i][0] != true) { + status = DML_FAIL_TOTAL_V_ACTIVE_BW; + } else if (locals->VRatioInPrefetchSupported[i][j] != true) { + status = DML_FAIL_V_RATIO_PREFETCH; + } else if (locals->PTEBufferSizeNotExceeded[i][j] != true) { + status = DML_FAIL_PTE_BUFFER_SIZE; + } else if (mode_lib->vba.NonsupportedDSCInputBPC != false) { + status = DML_FAIL_DSC_INPUT_BPC; + } + + if (status == DML_VALIDATION_OK) { + locals->ModeSupport[i][j] = true; + } else { + locals->ModeSupport[i][j] = false; + } + locals->ValidationStatus[i] = status; + } + } + { + unsigned int MaximumMPCCombine = 0; + mode_lib->vba.VoltageLevel = mode_lib->vba.soc.num_states + 1; + for (i = mode_lib->vba.VoltageOverrideLevel; i <= mode_lib->vba.soc.num_states; i++) { + if (locals->ModeSupport[i][0] == true || locals->ModeSupport[i][1] == true) { + mode_lib->vba.VoltageLevel = i; + if (locals->ModeSupport[i][1] == true && (locals->ModeSupport[i][0] == false + || mode_lib->vba.WhenToDoMPCCombine == dm_mpc_always_when_possible)) { + MaximumMPCCombine = 1; + } else { + MaximumMPCCombine = 0; + } + break; + } + } + mode_lib->vba.ImmediateFlipSupport = + locals->ImmediateFlipSupportedForState[mode_lib->vba.VoltageLevel][MaximumMPCCombine]; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + mode_lib->vba.DPPPerPlane[k] = locals->NoOfDPP[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k]; + locals->DPPCLK[k] = locals->RequiredDPPCLK[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k]; + } + mode_lib->vba.DISPCLK = locals->RequiredDISPCLK[mode_lib->vba.VoltageLevel][MaximumMPCCombine]; + mode_lib->vba.maxMpcComb = MaximumMPCCombine; + } + mode_lib->vba.DCFCLK = mode_lib->vba.DCFCLKPerState[mode_lib->vba.VoltageLevel]; + mode_lib->vba.DRAMSpeed = mode_lib->vba.DRAMSpeedPerState[mode_lib->vba.VoltageLevel]; + mode_lib->vba.FabricClock = mode_lib->vba.FabricClockPerState[mode_lib->vba.VoltageLevel]; + mode_lib->vba.SOCCLK = mode_lib->vba.SOCCLKPerState[mode_lib->vba.VoltageLevel]; + mode_lib->vba.ReturnBW = locals->ReturnBWPerState[mode_lib->vba.VoltageLevel][0]; + mode_lib->vba.FabricAndDRAMBandwidth = locals->FabricAndDRAMBandwidthPerState[mode_lib->vba.VoltageLevel]; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.BlendingAndTiming[k] == k) { + mode_lib->vba.ODMCombineEnabled[k] = + locals->ODMCombineEnablePerState[mode_lib->vba.VoltageLevel][k]; + } else { + mode_lib->vba.ODMCombineEnabled[k] = 0; + } + mode_lib->vba.DSCEnabled[k] = + locals->RequiresDSC[mode_lib->vba.VoltageLevel][k]; + mode_lib->vba.OutputBpp[k] = + locals->OutputBppPerState[mode_lib->vba.VoltageLevel][k]; + } +} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.h b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.h new file mode 100644 index 000000000..a989d3ca1 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.h @@ -0,0 +1,32 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef _DCN20V2_DISPLAY_MODE_VBA_H_ +#define _DCN20V2_DISPLAY_MODE_VBA_H_ + +void dml20v2_recalculate(struct display_mode_lib *mode_lib); +void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c new file mode 100644 index 000000000..548cdef8a --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c @@ -0,0 +1,1685 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "../display_mode_lib.h" +#include "../display_mode_vba.h" +#include "display_rq_dlg_calc_20.h" + +// Function: dml20_rq_dlg_get_rq_params +// Calculate requestor related parameters that register definition agnostic +// (i.e. this layer does try to separate real values from register definition) +// Input: +// pipe_src_param - pipe source configuration (e.g. vp, pitch, etc.) +// Output: +// rq_param - values that can be used to setup RQ (e.g. swath_height, plane1_addr, etc.) +// +static void dml20_rq_dlg_get_rq_params( + struct display_mode_lib *mode_lib, + display_rq_params_st *rq_param, + const display_pipe_source_params_st *pipe_src_param); + +// Function: dml20_rq_dlg_get_dlg_params +// Calculate deadline related parameters +// +static void dml20_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib, + const display_e2e_pipe_params_st *e2e_pipe_param, + const unsigned int num_pipes, + const unsigned int pipe_idx, + display_dlg_regs_st *disp_dlg_regs, + display_ttu_regs_st *disp_ttu_regs, + const display_rq_dlg_params_st *rq_dlg_param, + const display_dlg_sys_params_st *dlg_sys_param, + const bool cstate_en, + const bool pstate_en); +/* + * NOTE: + * This file is gcc-parseable HW gospel, coming straight from HW engineers. + * + * It doesn't adhere to Linux kernel style and sometimes will do things in odd + * ways. Unless there is something clearly wrong with it the code should + * remain as-is as it provides us with a guarantee from HW that it is correct. + */ + +static void calculate_ttu_cursor(struct display_mode_lib *mode_lib, + double *refcyc_per_req_delivery_pre_cur, + double *refcyc_per_req_delivery_cur, + double refclk_freq_in_mhz, + double ref_freq_to_pix_freq, + double hscale_pixel_rate_l, + double hscl_ratio, + double vratio_pre_l, + double vratio_l, + unsigned int cur_width, + enum cursor_bpp cur_bpp); + +#include "../dml_inline_defs.h" + +static unsigned int get_bytes_per_element(enum source_format_class source_format, bool is_chroma) +{ + unsigned int ret_val = 0; + + if (source_format == dm_444_16) { + if (!is_chroma) + ret_val = 2; + } else if (source_format == dm_444_32) { + if (!is_chroma) + ret_val = 4; + } else if (source_format == dm_444_64) { + if (!is_chroma) + ret_val = 8; + } else if (source_format == dm_420_8) { + if (is_chroma) + ret_val = 2; + else + ret_val = 1; + } else if (source_format == dm_420_10) { + if (is_chroma) + ret_val = 4; + else + ret_val = 2; + } else if (source_format == dm_444_8) { + ret_val = 1; + } + return ret_val; +} + +static bool is_dual_plane(enum source_format_class source_format) +{ + bool ret_val = false; + + if ((source_format == dm_420_8) || (source_format == dm_420_10)) + ret_val = true; + + return ret_val; +} + +static double get_refcyc_per_delivery(struct display_mode_lib *mode_lib, + double refclk_freq_in_mhz, + double pclk_freq_in_mhz, + bool odm_combine, + unsigned int recout_width, + unsigned int hactive, + double vratio, + double hscale_pixel_rate, + unsigned int delivery_width, + unsigned int req_per_swath_ub) +{ + double refcyc_per_delivery = 0.0; + + if (vratio <= 1.0) { + if (odm_combine) + refcyc_per_delivery = (double) refclk_freq_in_mhz + * dml_min((double) recout_width, (double) hactive / 2.0) + / pclk_freq_in_mhz / (double) req_per_swath_ub; + else + refcyc_per_delivery = (double) refclk_freq_in_mhz * (double) recout_width + / pclk_freq_in_mhz / (double) req_per_swath_ub; + } else { + refcyc_per_delivery = (double) refclk_freq_in_mhz * (double) delivery_width + / (double) hscale_pixel_rate / (double) req_per_swath_ub; + } + + dml_print("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, refclk_freq_in_mhz); + dml_print("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, pclk_freq_in_mhz); + dml_print("DML_DLG: %s: recout_width = %d\n", __func__, recout_width); + dml_print("DML_DLG: %s: vratio = %3.2f\n", __func__, vratio); + dml_print("DML_DLG: %s: req_per_swath_ub = %d\n", __func__, req_per_swath_ub); + dml_print("DML_DLG: %s: refcyc_per_delivery= %3.2f\n", __func__, refcyc_per_delivery); + + return refcyc_per_delivery; + +} + +static unsigned int get_blk_size_bytes(const enum source_macro_tile_size tile_size) +{ + if (tile_size == dm_256k_tile) + return (256 * 1024); + else if (tile_size == dm_64k_tile) + return (64 * 1024); + else + return (4 * 1024); +} + +static void extract_rq_sizing_regs(struct display_mode_lib *mode_lib, + display_data_rq_regs_st *rq_regs, + const display_data_rq_sizing_params_st *rq_sizing) +{ + dml_print("DML_DLG: %s: rq_sizing param\n", __func__); + print__data_rq_sizing_params_st(mode_lib, rq_sizing); + + rq_regs->chunk_size = dml_log2(rq_sizing->chunk_bytes) - 10; + + if (rq_sizing->min_chunk_bytes == 0) + rq_regs->min_chunk_size = 0; + else + rq_regs->min_chunk_size = dml_log2(rq_sizing->min_chunk_bytes) - 8 + 1; + + rq_regs->meta_chunk_size = dml_log2(rq_sizing->meta_chunk_bytes) - 10; + if (rq_sizing->min_meta_chunk_bytes == 0) + rq_regs->min_meta_chunk_size = 0; + else + rq_regs->min_meta_chunk_size = dml_log2(rq_sizing->min_meta_chunk_bytes) - 6 + 1; + + rq_regs->dpte_group_size = dml_log2(rq_sizing->dpte_group_bytes) - 6; + rq_regs->mpte_group_size = dml_log2(rq_sizing->mpte_group_bytes) - 6; +} + +static void extract_rq_regs(struct display_mode_lib *mode_lib, + display_rq_regs_st *rq_regs, + const display_rq_params_st *rq_param) +{ + unsigned int detile_buf_size_in_bytes = mode_lib->ip.det_buffer_size_kbytes * 1024; + unsigned int detile_buf_plane1_addr = 0; + + extract_rq_sizing_regs(mode_lib, &(rq_regs->rq_regs_l), &rq_param->sizing.rq_l); + + rq_regs->rq_regs_l.pte_row_height_linear = dml_floor(dml_log2(rq_param->dlg.rq_l.dpte_row_height), + 1) - 3; + + if (rq_param->yuv420) { + extract_rq_sizing_regs(mode_lib, &(rq_regs->rq_regs_c), &rq_param->sizing.rq_c); + rq_regs->rq_regs_c.pte_row_height_linear = dml_floor(dml_log2(rq_param->dlg.rq_c.dpte_row_height), + 1) - 3; + } + + rq_regs->rq_regs_l.swath_height = dml_log2(rq_param->dlg.rq_l.swath_height); + rq_regs->rq_regs_c.swath_height = dml_log2(rq_param->dlg.rq_c.swath_height); + + // TODO: take the max between luma, chroma chunk size? + // okay for now, as we are setting chunk_bytes to 8kb anyways + if (rq_param->sizing.rq_l.chunk_bytes >= 32 * 1024) { //32kb + rq_regs->drq_expansion_mode = 0; + } else { + rq_regs->drq_expansion_mode = 2; + } + rq_regs->prq_expansion_mode = 1; + rq_regs->mrq_expansion_mode = 1; + rq_regs->crq_expansion_mode = 1; + + if (rq_param->yuv420) { + if ((double) rq_param->misc.rq_l.stored_swath_bytes + / (double) rq_param->misc.rq_c.stored_swath_bytes <= 1.5) { + detile_buf_plane1_addr = (detile_buf_size_in_bytes / 2.0 / 64.0); // half to chroma + } else { + detile_buf_plane1_addr = dml_round_to_multiple((unsigned int) ((2.0 * detile_buf_size_in_bytes) / 3.0), + 256, + 0) / 64.0; // 2/3 to chroma + } + } + rq_regs->plane1_base_address = detile_buf_plane1_addr; +} + +static void handle_det_buf_split(struct display_mode_lib *mode_lib, + display_rq_params_st *rq_param, + const display_pipe_source_params_st *pipe_src_param) +{ + unsigned int total_swath_bytes = 0; + unsigned int swath_bytes_l = 0; + unsigned int swath_bytes_c = 0; + unsigned int full_swath_bytes_packed_l = 0; + unsigned int full_swath_bytes_packed_c = 0; + bool req128_l = false; + bool req128_c = false; + bool surf_linear = (pipe_src_param->sw_mode == dm_sw_linear); + bool surf_vert = (pipe_src_param->source_scan == dm_vert); + unsigned int log2_swath_height_l = 0; + unsigned int log2_swath_height_c = 0; + unsigned int detile_buf_size_in_bytes = mode_lib->ip.det_buffer_size_kbytes * 1024; + + full_swath_bytes_packed_l = rq_param->misc.rq_l.full_swath_bytes; + full_swath_bytes_packed_c = rq_param->misc.rq_c.full_swath_bytes; + + if (rq_param->yuv420_10bpc) { + full_swath_bytes_packed_l = dml_round_to_multiple(rq_param->misc.rq_l.full_swath_bytes * 2 / 3, + 256, + 1) + 256; + full_swath_bytes_packed_c = dml_round_to_multiple(rq_param->misc.rq_c.full_swath_bytes * 2 / 3, + 256, + 1) + 256; + } + + if (rq_param->yuv420) { + total_swath_bytes = 2 * full_swath_bytes_packed_l + 2 * full_swath_bytes_packed_c; + + if (total_swath_bytes <= detile_buf_size_in_bytes) { //full 256b request + req128_l = false; + req128_c = false; + swath_bytes_l = full_swath_bytes_packed_l; + swath_bytes_c = full_swath_bytes_packed_c; + } else { //128b request (for luma only for yuv420 8bpc) + req128_l = true; + req128_c = false; + swath_bytes_l = full_swath_bytes_packed_l / 2; + swath_bytes_c = full_swath_bytes_packed_c; + } + // Note: assumption, the config that pass in will fit into + // the detiled buffer. + } else { + total_swath_bytes = 2 * full_swath_bytes_packed_l; + + if (total_swath_bytes <= detile_buf_size_in_bytes) + req128_l = false; + else + req128_l = true; + + swath_bytes_l = total_swath_bytes; + swath_bytes_c = 0; + } + rq_param->misc.rq_l.stored_swath_bytes = swath_bytes_l; + rq_param->misc.rq_c.stored_swath_bytes = swath_bytes_c; + + if (surf_linear) { + log2_swath_height_l = 0; + log2_swath_height_c = 0; + } else { + unsigned int swath_height_l; + unsigned int swath_height_c; + + if (!surf_vert) { + swath_height_l = rq_param->misc.rq_l.blk256_height; + swath_height_c = rq_param->misc.rq_c.blk256_height; + } else { + swath_height_l = rq_param->misc.rq_l.blk256_width; + swath_height_c = rq_param->misc.rq_c.blk256_width; + } + + if (swath_height_l > 0) + log2_swath_height_l = dml_log2(swath_height_l); + + if (req128_l && log2_swath_height_l > 0) + log2_swath_height_l -= 1; + + if (swath_height_c > 0) + log2_swath_height_c = dml_log2(swath_height_c); + + if (req128_c && log2_swath_height_c > 0) + log2_swath_height_c -= 1; + } + + rq_param->dlg.rq_l.swath_height = 1 << log2_swath_height_l; + rq_param->dlg.rq_c.swath_height = 1 << log2_swath_height_c; + + dml_print("DML_DLG: %s: req128_l = %0d\n", __func__, req128_l); + dml_print("DML_DLG: %s: req128_c = %0d\n", __func__, req128_c); + dml_print("DML_DLG: %s: full_swath_bytes_packed_l = %0d\n", + __func__, + full_swath_bytes_packed_l); + dml_print("DML_DLG: %s: full_swath_bytes_packed_c = %0d\n", + __func__, + full_swath_bytes_packed_c); +} + +static void get_meta_and_pte_attr(struct display_mode_lib *mode_lib, + display_data_rq_dlg_params_st *rq_dlg_param, + display_data_rq_misc_params_st *rq_misc_param, + display_data_rq_sizing_params_st *rq_sizing_param, + unsigned int vp_width, + unsigned int vp_height, + unsigned int data_pitch, + unsigned int meta_pitch, + unsigned int source_format, + unsigned int tiling, + unsigned int macro_tile_size, + unsigned int source_scan, + unsigned int is_chroma) +{ + bool surf_linear = (tiling == dm_sw_linear); + bool surf_vert = (source_scan == dm_vert); + + unsigned int bytes_per_element; + unsigned int bytes_per_element_y = get_bytes_per_element((enum source_format_class)(source_format), + false); + unsigned int bytes_per_element_c = get_bytes_per_element((enum source_format_class)(source_format), + true); + + unsigned int blk256_width = 0; + unsigned int blk256_height = 0; + + unsigned int blk256_width_y = 0; + unsigned int blk256_height_y = 0; + unsigned int blk256_width_c = 0; + unsigned int blk256_height_c = 0; + unsigned int log2_bytes_per_element; + unsigned int log2_blk256_width; + unsigned int log2_blk256_height; + unsigned int blk_bytes; + unsigned int log2_blk_bytes; + unsigned int log2_blk_height; + unsigned int log2_blk_width; + unsigned int log2_meta_req_bytes; + unsigned int log2_meta_req_height; + unsigned int log2_meta_req_width; + unsigned int meta_req_width; + unsigned int meta_req_height; + unsigned int log2_meta_row_height; + unsigned int meta_row_width_ub; + unsigned int log2_meta_chunk_bytes; + unsigned int log2_meta_chunk_height; + + //full sized meta chunk width in unit of data elements + unsigned int log2_meta_chunk_width; + unsigned int log2_min_meta_chunk_bytes; + unsigned int min_meta_chunk_width; + unsigned int meta_chunk_width; + unsigned int meta_chunk_per_row_int; + unsigned int meta_row_remainder; + unsigned int meta_chunk_threshold; + unsigned int meta_blk_bytes; + unsigned int meta_blk_height; + unsigned int meta_blk_width; + unsigned int meta_surface_bytes; + unsigned int vmpg_bytes; + unsigned int meta_pte_req_per_frame_ub; + unsigned int meta_pte_bytes_per_frame_ub; + const unsigned int log2_vmpg_bytes = dml_log2(mode_lib->soc.vmm_page_size_bytes); + const unsigned int dpte_buf_in_pte_reqs = mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma; + const unsigned int pde_proc_buffer_size_64k_reqs = + mode_lib->ip.pde_proc_buffer_size_64k_reqs; + + unsigned int log2_vmpg_height = 0; + unsigned int log2_vmpg_width = 0; + unsigned int log2_dpte_req_height_ptes = 0; + unsigned int log2_dpte_req_height = 0; + unsigned int log2_dpte_req_width = 0; + unsigned int log2_dpte_row_height_linear = 0; + unsigned int log2_dpte_row_height = 0; + unsigned int log2_dpte_group_width = 0; + unsigned int dpte_row_width_ub = 0; + unsigned int dpte_req_height = 0; + unsigned int dpte_req_width = 0; + unsigned int dpte_group_width = 0; + unsigned int log2_dpte_group_bytes = 0; + unsigned int log2_dpte_group_length = 0; + unsigned int pde_buf_entries; + bool yuv420 = (source_format == dm_420_8 || source_format == dm_420_10); + + Calculate256BBlockSizes((enum source_format_class)(source_format), + (enum dm_swizzle_mode)(tiling), + bytes_per_element_y, + bytes_per_element_c, + &blk256_height_y, + &blk256_height_c, + &blk256_width_y, + &blk256_width_c); + + if (!is_chroma) { + blk256_width = blk256_width_y; + blk256_height = blk256_height_y; + bytes_per_element = bytes_per_element_y; + } else { + blk256_width = blk256_width_c; + blk256_height = blk256_height_c; + bytes_per_element = bytes_per_element_c; + } + + log2_bytes_per_element = dml_log2(bytes_per_element); + + dml_print("DML_DLG: %s: surf_linear = %d\n", __func__, surf_linear); + dml_print("DML_DLG: %s: surf_vert = %d\n", __func__, surf_vert); + dml_print("DML_DLG: %s: blk256_width = %d\n", __func__, blk256_width); + dml_print("DML_DLG: %s: blk256_height = %d\n", __func__, blk256_height); + + log2_blk256_width = dml_log2((double) blk256_width); + log2_blk256_height = dml_log2((double) blk256_height); + blk_bytes = surf_linear ? + 256 : get_blk_size_bytes((enum source_macro_tile_size) macro_tile_size); + log2_blk_bytes = dml_log2((double) blk_bytes); + log2_blk_height = 0; + log2_blk_width = 0; + + // remember log rule + // "+" in log is multiply + // "-" in log is divide + // "/2" is like square root + // blk is vertical biased + if (tiling != dm_sw_linear) + log2_blk_height = log2_blk256_height + + dml_ceil((double) (log2_blk_bytes - 8) / 2.0, 1); + else + log2_blk_height = 0; // blk height of 1 + + log2_blk_width = log2_blk_bytes - log2_bytes_per_element - log2_blk_height; + + if (!surf_vert) { + rq_dlg_param->swath_width_ub = dml_round_to_multiple(vp_width - 1, blk256_width, 1) + + blk256_width; + rq_dlg_param->req_per_swath_ub = rq_dlg_param->swath_width_ub >> log2_blk256_width; + } else { + rq_dlg_param->swath_width_ub = dml_round_to_multiple(vp_height - 1, blk256_height, 1) + + blk256_height; + rq_dlg_param->req_per_swath_ub = rq_dlg_param->swath_width_ub >> log2_blk256_height; + } + + if (!surf_vert) + rq_misc_param->full_swath_bytes = rq_dlg_param->swath_width_ub * blk256_height + * bytes_per_element; + else + rq_misc_param->full_swath_bytes = rq_dlg_param->swath_width_ub * blk256_width + * bytes_per_element; + + rq_misc_param->blk256_height = blk256_height; + rq_misc_param->blk256_width = blk256_width; + + // ------- + // meta + // ------- + log2_meta_req_bytes = 6; // meta request is 64b and is 8x8byte meta element + + // each 64b meta request for dcn is 8x8 meta elements and + // a meta element covers one 256b block of the data surface. + log2_meta_req_height = log2_blk256_height + 3; // meta req is 8x8 byte, each byte represent 1 blk256 + log2_meta_req_width = log2_meta_req_bytes + 8 - log2_bytes_per_element + - log2_meta_req_height; + meta_req_width = 1 << log2_meta_req_width; + meta_req_height = 1 << log2_meta_req_height; + log2_meta_row_height = 0; + meta_row_width_ub = 0; + + // the dimensions of a meta row are meta_row_width x meta_row_height in elements. + // calculate upper bound of the meta_row_width + if (!surf_vert) { + log2_meta_row_height = log2_meta_req_height; + meta_row_width_ub = dml_round_to_multiple(vp_width - 1, meta_req_width, 1) + + meta_req_width; + rq_dlg_param->meta_req_per_row_ub = meta_row_width_ub / meta_req_width; + } else { + log2_meta_row_height = log2_meta_req_width; + meta_row_width_ub = dml_round_to_multiple(vp_height - 1, meta_req_height, 1) + + meta_req_height; + rq_dlg_param->meta_req_per_row_ub = meta_row_width_ub / meta_req_height; + } + rq_dlg_param->meta_bytes_per_row_ub = rq_dlg_param->meta_req_per_row_ub * 64; + + rq_dlg_param->meta_row_height = 1 << log2_meta_row_height; + + log2_meta_chunk_bytes = dml_log2(rq_sizing_param->meta_chunk_bytes); + log2_meta_chunk_height = log2_meta_row_height; + + //full sized meta chunk width in unit of data elements + log2_meta_chunk_width = log2_meta_chunk_bytes + 8 - log2_bytes_per_element + - log2_meta_chunk_height; + log2_min_meta_chunk_bytes = dml_log2(rq_sizing_param->min_meta_chunk_bytes); + min_meta_chunk_width = 1 + << (log2_min_meta_chunk_bytes + 8 - log2_bytes_per_element + - log2_meta_chunk_height); + meta_chunk_width = 1 << log2_meta_chunk_width; + meta_chunk_per_row_int = (unsigned int) (meta_row_width_ub / meta_chunk_width); + meta_row_remainder = meta_row_width_ub % meta_chunk_width; + meta_chunk_threshold = 0; + meta_blk_bytes = 4096; + meta_blk_height = blk256_height * 64; + meta_blk_width = meta_blk_bytes * 256 / bytes_per_element / meta_blk_height; + meta_surface_bytes = meta_pitch + * (dml_round_to_multiple(vp_height - 1, meta_blk_height, 1) + meta_blk_height) + * bytes_per_element / 256; + vmpg_bytes = mode_lib->soc.vmm_page_size_bytes; + meta_pte_req_per_frame_ub = (dml_round_to_multiple(meta_surface_bytes - vmpg_bytes, + 8 * vmpg_bytes, + 1) + 8 * vmpg_bytes) / (8 * vmpg_bytes); + meta_pte_bytes_per_frame_ub = meta_pte_req_per_frame_ub * 64; //64B mpte request + rq_dlg_param->meta_pte_bytes_per_frame_ub = meta_pte_bytes_per_frame_ub; + + dml_print("DML_DLG: %s: meta_blk_height = %d\n", __func__, meta_blk_height); + dml_print("DML_DLG: %s: meta_blk_width = %d\n", __func__, meta_blk_width); + dml_print("DML_DLG: %s: meta_surface_bytes = %d\n", __func__, meta_surface_bytes); + dml_print("DML_DLG: %s: meta_pte_req_per_frame_ub = %d\n", + __func__, + meta_pte_req_per_frame_ub); + dml_print("DML_DLG: %s: meta_pte_bytes_per_frame_ub = %d\n", + __func__, + meta_pte_bytes_per_frame_ub); + + if (!surf_vert) + meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width; + else + meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height; + + if (meta_row_remainder <= meta_chunk_threshold) + rq_dlg_param->meta_chunks_per_row_ub = meta_chunk_per_row_int + 1; + else + rq_dlg_param->meta_chunks_per_row_ub = meta_chunk_per_row_int + 2; + + // ------ + // dpte + // ------ + if (surf_linear) { + log2_vmpg_height = 0; // one line high + } else { + log2_vmpg_height = (log2_vmpg_bytes - 8) / 2 + log2_blk256_height; + } + log2_vmpg_width = log2_vmpg_bytes - log2_bytes_per_element - log2_vmpg_height; + + // only 3 possible shapes for dpte request in dimensions of ptes: 8x1, 4x2, 2x4. + if (surf_linear) { //one 64B PTE request returns 8 PTEs + log2_dpte_req_height_ptes = 0; + log2_dpte_req_width = log2_vmpg_width + 3; + log2_dpte_req_height = 0; + } else if (log2_blk_bytes == 12) { //4KB tile means 4kB page size + //one 64B req gives 8x1 PTEs for 4KB tile + log2_dpte_req_height_ptes = 0; + log2_dpte_req_width = log2_blk_width + 3; + log2_dpte_req_height = log2_blk_height + 0; + } else if ((log2_blk_bytes >= 16) && (log2_vmpg_bytes == 12)) { // tile block >= 64KB + //two 64B reqs of 2x4 PTEs give 16 PTEs to cover 64KB + log2_dpte_req_height_ptes = 4; + log2_dpte_req_width = log2_blk256_width + 4; // log2_64KB_width + log2_dpte_req_height = log2_blk256_height + 4; // log2_64KB_height + } else { //64KB page size and must 64KB tile block + //one 64B req gives 8x1 PTEs for 64KB tile + log2_dpte_req_height_ptes = 0; + log2_dpte_req_width = log2_blk_width + 3; + log2_dpte_req_height = log2_blk_height + 0; + } + + // The dpte request dimensions in data elements is dpte_req_width x dpte_req_height + // log2_vmpg_width is how much 1 pte represent, now calculating how much a 64b pte req represent + // That depends on the pte shape (i.e. 8x1, 4x2, 2x4) + //log2_dpte_req_height = log2_vmpg_height + log2_dpte_req_height_ptes; + //log2_dpte_req_width = log2_vmpg_width + log2_dpte_req_width_ptes; + dpte_req_height = 1 << log2_dpte_req_height; + dpte_req_width = 1 << log2_dpte_req_width; + + // calculate pitch dpte row buffer can hold + // round the result down to a power of two. + pde_buf_entries = yuv420 ? (pde_proc_buffer_size_64k_reqs >> 1) : pde_proc_buffer_size_64k_reqs; + if (surf_linear) { + unsigned int dpte_row_height; + + log2_dpte_row_height_linear = dml_floor(dml_log2(dml_min(64 * 1024 * pde_buf_entries + / bytes_per_element, + dpte_buf_in_pte_reqs + * dpte_req_width) + / data_pitch), + 1); + + ASSERT(log2_dpte_row_height_linear >= 3); + + if (log2_dpte_row_height_linear > 7) + log2_dpte_row_height_linear = 7; + + log2_dpte_row_height = log2_dpte_row_height_linear; + // For linear, the dpte row is pitch dependent and the pte requests wrap at the pitch boundary. + // the dpte_row_width_ub is the upper bound of data_pitch*dpte_row_height in elements with this unique buffering. + dpte_row_height = 1 << log2_dpte_row_height; + dpte_row_width_ub = dml_round_to_multiple(data_pitch * dpte_row_height - 1, + dpte_req_width, + 1) + dpte_req_width; + rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_width; + } else { + // the upper bound of the dpte_row_width without dependency on viewport position follows. + // for tiled mode, row height is the same as req height and row store up to vp size upper bound + if (!surf_vert) { + log2_dpte_row_height = log2_dpte_req_height; + dpte_row_width_ub = dml_round_to_multiple(vp_width - 1, dpte_req_width, 1) + + dpte_req_width; + rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_width; + } else { + log2_dpte_row_height = + (log2_blk_width < log2_dpte_req_width) ? + log2_blk_width : log2_dpte_req_width; + dpte_row_width_ub = dml_round_to_multiple(vp_height - 1, dpte_req_height, 1) + + dpte_req_height; + rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_height; + } + } + if (log2_blk_bytes >= 16 && log2_vmpg_bytes == 12) // tile block >= 64KB + rq_dlg_param->dpte_bytes_per_row_ub = rq_dlg_param->dpte_req_per_row_ub * 128; //2*64B dpte request + else + rq_dlg_param->dpte_bytes_per_row_ub = rq_dlg_param->dpte_req_per_row_ub * 64; //64B dpte request + + rq_dlg_param->dpte_row_height = 1 << log2_dpte_row_height; + + // the dpte_group_bytes is reduced for the specific case of vertical + // access of a tile surface that has dpte request of 8x1 ptes. + if (!surf_linear & (log2_dpte_req_height_ptes == 0) & surf_vert) //reduced, in this case, will have page fault within a group + rq_sizing_param->dpte_group_bytes = 512; + else + //full size + rq_sizing_param->dpte_group_bytes = 2048; + + //since pte request size is 64byte, the number of data pte requests per full sized group is as follows. + log2_dpte_group_bytes = dml_log2(rq_sizing_param->dpte_group_bytes); + log2_dpte_group_length = log2_dpte_group_bytes - 6; //length in 64b requests + + // full sized data pte group width in elements + if (!surf_vert) + log2_dpte_group_width = log2_dpte_group_length + log2_dpte_req_width; + else + log2_dpte_group_width = log2_dpte_group_length + log2_dpte_req_height; + + //But if the tile block >=64KB and the page size is 4KB, then each dPTE request is 2*64B + if ((log2_blk_bytes >= 16) && (log2_vmpg_bytes == 12)) // tile block >= 64KB + log2_dpte_group_width = log2_dpte_group_width - 1; + + dpte_group_width = 1 << log2_dpte_group_width; + + // since dpte groups are only aligned to dpte_req_width and not dpte_group_width, + // the upper bound for the dpte groups per row is as follows. + rq_dlg_param->dpte_groups_per_row_ub = dml_ceil((double) dpte_row_width_ub / dpte_group_width, + 1); +} + +static void get_surf_rq_param(struct display_mode_lib *mode_lib, + display_data_rq_sizing_params_st *rq_sizing_param, + display_data_rq_dlg_params_st *rq_dlg_param, + display_data_rq_misc_params_st *rq_misc_param, + const display_pipe_source_params_st *pipe_src_param, + bool is_chroma) +{ + bool mode_422 = false; + unsigned int vp_width = 0; + unsigned int vp_height = 0; + unsigned int data_pitch = 0; + unsigned int meta_pitch = 0; + unsigned int ppe = mode_422 ? 2 : 1; + + // TODO check if ppe apply for both luma and chroma in 422 case + if (is_chroma) { + vp_width = pipe_src_param->viewport_width_c / ppe; + vp_height = pipe_src_param->viewport_height_c; + data_pitch = pipe_src_param->data_pitch_c; + meta_pitch = pipe_src_param->meta_pitch_c; + } else { + vp_width = pipe_src_param->viewport_width / ppe; + vp_height = pipe_src_param->viewport_height; + data_pitch = pipe_src_param->data_pitch; + meta_pitch = pipe_src_param->meta_pitch; + } + + rq_sizing_param->chunk_bytes = 8192; + + if (rq_sizing_param->chunk_bytes == 64 * 1024) + rq_sizing_param->min_chunk_bytes = 0; + else + rq_sizing_param->min_chunk_bytes = 1024; + + rq_sizing_param->meta_chunk_bytes = 2048; + rq_sizing_param->min_meta_chunk_bytes = 256; + + rq_sizing_param->mpte_group_bytes = 2048; + + get_meta_and_pte_attr(mode_lib, + rq_dlg_param, + rq_misc_param, + rq_sizing_param, + vp_width, + vp_height, + data_pitch, + meta_pitch, + pipe_src_param->source_format, + pipe_src_param->sw_mode, + pipe_src_param->macro_tile_size, + pipe_src_param->source_scan, + is_chroma); +} + +static void dml20_rq_dlg_get_rq_params(struct display_mode_lib *mode_lib, + display_rq_params_st *rq_param, + const display_pipe_source_params_st *pipe_src_param) +{ + // get param for luma surface + rq_param->yuv420 = pipe_src_param->source_format == dm_420_8 + || pipe_src_param->source_format == dm_420_10; + rq_param->yuv420_10bpc = pipe_src_param->source_format == dm_420_10; + + get_surf_rq_param(mode_lib, + &(rq_param->sizing.rq_l), + &(rq_param->dlg.rq_l), + &(rq_param->misc.rq_l), + pipe_src_param, + 0); + + if (is_dual_plane((enum source_format_class)(pipe_src_param->source_format))) { + // get param for chroma surface + get_surf_rq_param(mode_lib, + &(rq_param->sizing.rq_c), + &(rq_param->dlg.rq_c), + &(rq_param->misc.rq_c), + pipe_src_param, + 1); + } + + // calculate how to split the det buffer space between luma and chroma + handle_det_buf_split(mode_lib, rq_param, pipe_src_param); + print__rq_params_st(mode_lib, rq_param); +} + +void dml20_rq_dlg_get_rq_reg(struct display_mode_lib *mode_lib, + display_rq_regs_st *rq_regs, + const display_pipe_params_st *pipe_param) +{ + display_rq_params_st rq_param = {0}; + + memset(rq_regs, 0, sizeof(*rq_regs)); + dml20_rq_dlg_get_rq_params(mode_lib, &rq_param, &pipe_param->src); + extract_rq_regs(mode_lib, rq_regs, &rq_param); + + print__rq_regs_st(mode_lib, rq_regs); +} + +// Note: currently taken in as is. +// Nice to decouple code from hw register implement and extract code that are repeated for luma and chroma. +static void dml20_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib, + const display_e2e_pipe_params_st *e2e_pipe_param, + const unsigned int num_pipes, + const unsigned int pipe_idx, + display_dlg_regs_st *disp_dlg_regs, + display_ttu_regs_st *disp_ttu_regs, + const display_rq_dlg_params_st *rq_dlg_param, + const display_dlg_sys_params_st *dlg_sys_param, + const bool cstate_en, + const bool pstate_en) +{ + const display_pipe_source_params_st *src = &e2e_pipe_param[pipe_idx].pipe.src; + const display_pipe_dest_params_st *dst = &e2e_pipe_param[pipe_idx].pipe.dest; + const display_output_params_st *dout = &e2e_pipe_param[pipe_idx].dout; + const display_clocks_and_cfg_st *clks = &e2e_pipe_param[pipe_idx].clks_cfg; + const scaler_ratio_depth_st *scl = &e2e_pipe_param[pipe_idx].pipe.scale_ratio_depth; + const scaler_taps_st *taps = &e2e_pipe_param[pipe_idx].pipe.scale_taps; + + // ------------------------- + // Section 1.15.2.1: OTG dependent Params + // ------------------------- + // Timing + unsigned int htotal = dst->htotal; +// unsigned int hblank_start = dst.hblank_start; // TODO: Remove + unsigned int hblank_end = dst->hblank_end; + unsigned int vblank_start = dst->vblank_start; + unsigned int vblank_end = dst->vblank_end; + unsigned int min_vblank = mode_lib->ip.min_vblank_lines; + + double dppclk_freq_in_mhz = clks->dppclk_mhz; + double dispclk_freq_in_mhz = clks->dispclk_mhz; + double refclk_freq_in_mhz = clks->refclk_mhz; + double pclk_freq_in_mhz = dst->pixel_rate_mhz; + bool interlaced = dst->interlaced; + + double ref_freq_to_pix_freq = refclk_freq_in_mhz / pclk_freq_in_mhz; + + double min_dcfclk_mhz; + double t_calc_us; + double min_ttu_vblank; + + double min_dst_y_ttu_vblank; + unsigned int dlg_vblank_start; + bool dual_plane; + bool mode_422; + unsigned int access_dir; + unsigned int vp_height_l; + unsigned int vp_width_l; + unsigned int vp_height_c; + unsigned int vp_width_c; + + // Scaling + unsigned int htaps_l; + unsigned int htaps_c; + double hratio_l; + double hratio_c; + double vratio_l; + double vratio_c; + bool scl_enable; + + double line_time_in_us; + // double vinit_l; + // double vinit_c; + // double vinit_bot_l; + // double vinit_bot_c; + + // unsigned int swath_height_l; + unsigned int swath_width_ub_l; + // unsigned int dpte_bytes_per_row_ub_l; + unsigned int dpte_groups_per_row_ub_l; + // unsigned int meta_pte_bytes_per_frame_ub_l; + // unsigned int meta_bytes_per_row_ub_l; + + // unsigned int swath_height_c; + unsigned int swath_width_ub_c; + // unsigned int dpte_bytes_per_row_ub_c; + unsigned int dpte_groups_per_row_ub_c; + + unsigned int meta_chunks_per_row_ub_l; + unsigned int meta_chunks_per_row_ub_c; + unsigned int vupdate_offset; + unsigned int vupdate_width; + unsigned int vready_offset; + + unsigned int dppclk_delay_subtotal; + unsigned int dispclk_delay_subtotal; + unsigned int pixel_rate_delay_subtotal; + + unsigned int vstartup_start; + unsigned int dst_x_after_scaler; + unsigned int dst_y_after_scaler; + double line_wait; + double dst_y_prefetch; + double dst_y_per_vm_vblank; + double dst_y_per_row_vblank; + double dst_y_per_vm_flip; + double dst_y_per_row_flip; + double min_dst_y_per_vm_vblank; + double min_dst_y_per_row_vblank; + double lsw; + double vratio_pre_l; + double vratio_pre_c; + unsigned int req_per_swath_ub_l; + unsigned int req_per_swath_ub_c; + unsigned int meta_row_height_l; + unsigned int meta_row_height_c; + unsigned int swath_width_pixels_ub_l; + unsigned int swath_width_pixels_ub_c; + unsigned int scaler_rec_in_width_l; + unsigned int scaler_rec_in_width_c; + unsigned int dpte_row_height_l; + unsigned int dpte_row_height_c; + double hscale_pixel_rate_l; + double hscale_pixel_rate_c; + double min_hratio_fact_l; + double min_hratio_fact_c; + double refcyc_per_line_delivery_pre_l; + double refcyc_per_line_delivery_pre_c; + double refcyc_per_line_delivery_l; + double refcyc_per_line_delivery_c; + + double refcyc_per_req_delivery_pre_l; + double refcyc_per_req_delivery_pre_c; + double refcyc_per_req_delivery_l; + double refcyc_per_req_delivery_c; + + unsigned int full_recout_width; + double refcyc_per_req_delivery_pre_cur0; + double refcyc_per_req_delivery_cur0; + double refcyc_per_req_delivery_pre_cur1; + double refcyc_per_req_delivery_cur1; + + memset(disp_dlg_regs, 0, sizeof(*disp_dlg_regs)); + memset(disp_ttu_regs, 0, sizeof(*disp_ttu_regs)); + + dml_print("DML_DLG: %s: cstate_en = %d\n", __func__, cstate_en); + dml_print("DML_DLG: %s: pstate_en = %d\n", __func__, pstate_en); + + dml_print("DML_DLG: %s: dppclk_freq_in_mhz = %3.2f\n", __func__, dppclk_freq_in_mhz); + dml_print("DML_DLG: %s: dispclk_freq_in_mhz = %3.2f\n", __func__, dispclk_freq_in_mhz); + dml_print("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, refclk_freq_in_mhz); + dml_print("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, pclk_freq_in_mhz); + dml_print("DML_DLG: %s: interlaced = %d\n", __func__, interlaced); + ASSERT(ref_freq_to_pix_freq < 4.0); + + disp_dlg_regs->ref_freq_to_pix_freq = + (unsigned int) (ref_freq_to_pix_freq * dml_pow(2, 19)); + disp_dlg_regs->refcyc_per_htotal = (unsigned int) (ref_freq_to_pix_freq * (double) htotal + * dml_pow(2, 8)); + disp_dlg_regs->dlg_vblank_end = interlaced ? (vblank_end / 2) : vblank_end; // 15 bits + disp_dlg_regs->refcyc_h_blank_end = (unsigned int) ((double) hblank_end + * (double) ref_freq_to_pix_freq); + ASSERT(disp_dlg_regs->refcyc_h_blank_end < (unsigned int) dml_pow(2, 13)); + + min_dcfclk_mhz = dlg_sys_param->deepsleep_dcfclk_mhz; + t_calc_us = get_tcalc(mode_lib, e2e_pipe_param, num_pipes); + min_ttu_vblank = get_min_ttu_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + + min_dst_y_ttu_vblank = min_ttu_vblank * pclk_freq_in_mhz / (double) htotal; + dlg_vblank_start = interlaced ? (vblank_start / 2) : vblank_start; + + disp_dlg_regs->min_dst_y_next_start = (unsigned int) ((double) dlg_vblank_start * dml_pow(2, 2)); + ASSERT(disp_dlg_regs->min_dst_y_next_start < (unsigned int) dml_pow(2, 18)); + + dml_print("DML_DLG: %s: min_dcfclk_mhz = %3.2f\n", + __func__, + min_dcfclk_mhz); + dml_print("DML_DLG: %s: min_ttu_vblank = %3.2f\n", + __func__, + min_ttu_vblank); + dml_print("DML_DLG: %s: min_dst_y_ttu_vblank = %3.2f\n", + __func__, + min_dst_y_ttu_vblank); + dml_print("DML_DLG: %s: t_calc_us = %3.2f\n", + __func__, + t_calc_us); + dml_print("DML_DLG: %s: disp_dlg_regs->min_dst_y_next_start = 0x%0x\n", + __func__, + disp_dlg_regs->min_dst_y_next_start); + dml_print("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", + __func__, + ref_freq_to_pix_freq); + + // ------------------------- + // Section 1.15.2.2: Prefetch, Active and TTU + // ------------------------- + // Prefetch Calc + // Source +// dcc_en = src.dcc; + dual_plane = is_dual_plane((enum source_format_class)(src->source_format)); + mode_422 = false; // TODO + access_dir = (src->source_scan == dm_vert); // vp access direction: horizontal or vertical accessed +// bytes_per_element_l = get_bytes_per_element(source_format_class(src.source_format), 0); +// bytes_per_element_c = get_bytes_per_element(source_format_class(src.source_format), 1); + vp_height_l = src->viewport_height; + vp_width_l = src->viewport_width; + vp_height_c = src->viewport_height_c; + vp_width_c = src->viewport_width_c; + + // Scaling + htaps_l = taps->htaps; + htaps_c = taps->htaps_c; + hratio_l = scl->hscl_ratio; + hratio_c = scl->hscl_ratio_c; + vratio_l = scl->vscl_ratio; + vratio_c = scl->vscl_ratio_c; + scl_enable = scl->scl_enable; + + line_time_in_us = (htotal / pclk_freq_in_mhz); +// vinit_l = scl.vinit; +// vinit_c = scl.vinit_c; +// vinit_bot_l = scl.vinit_bot; +// vinit_bot_c = scl.vinit_bot_c; + +// unsigned int swath_height_l = rq_dlg_param->rq_l.swath_height; + swath_width_ub_l = rq_dlg_param->rq_l.swath_width_ub; +// unsigned int dpte_bytes_per_row_ub_l = rq_dlg_param->rq_l.dpte_bytes_per_row_ub; + dpte_groups_per_row_ub_l = rq_dlg_param->rq_l.dpte_groups_per_row_ub; +// unsigned int meta_pte_bytes_per_frame_ub_l = rq_dlg_param->rq_l.meta_pte_bytes_per_frame_ub; +// unsigned int meta_bytes_per_row_ub_l = rq_dlg_param->rq_l.meta_bytes_per_row_ub; + +// unsigned int swath_height_c = rq_dlg_param->rq_c.swath_height; + swath_width_ub_c = rq_dlg_param->rq_c.swath_width_ub; + // dpte_bytes_per_row_ub_c = rq_dlg_param->rq_c.dpte_bytes_per_row_ub; + dpte_groups_per_row_ub_c = rq_dlg_param->rq_c.dpte_groups_per_row_ub; + + meta_chunks_per_row_ub_l = rq_dlg_param->rq_l.meta_chunks_per_row_ub; + meta_chunks_per_row_ub_c = rq_dlg_param->rq_c.meta_chunks_per_row_ub; + vupdate_offset = dst->vupdate_offset; + vupdate_width = dst->vupdate_width; + vready_offset = dst->vready_offset; + + dppclk_delay_subtotal = mode_lib->ip.dppclk_delay_subtotal; + dispclk_delay_subtotal = mode_lib->ip.dispclk_delay_subtotal; + + if (scl_enable) + dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_scl; + else + dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_scl_lb_only; + + dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_cnvc_formatter + + src->num_cursors * mode_lib->ip.dppclk_delay_cnvc_cursor; + + if (dout->dsc_enable) { + double dsc_delay = get_dsc_delay(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + + dispclk_delay_subtotal += dsc_delay; + } + + pixel_rate_delay_subtotal = dppclk_delay_subtotal * pclk_freq_in_mhz / dppclk_freq_in_mhz + + dispclk_delay_subtotal * pclk_freq_in_mhz / dispclk_freq_in_mhz; + + vstartup_start = dst->vstartup_start; + if (interlaced) { + if (vstartup_start / 2.0 + - (double) (vready_offset + vupdate_width + vupdate_offset) / htotal + <= vblank_end / 2.0) + disp_dlg_regs->vready_after_vcount0 = 1; + else + disp_dlg_regs->vready_after_vcount0 = 0; + } else { + if (vstartup_start + - (double) (vready_offset + vupdate_width + vupdate_offset) / htotal + <= vblank_end) + disp_dlg_regs->vready_after_vcount0 = 1; + else + disp_dlg_regs->vready_after_vcount0 = 0; + } + + // TODO: Where is this coming from? + if (interlaced) + vstartup_start = vstartup_start / 2; + + // TODO: What if this min_vblank doesn't match the value in the dml_config_settings.cpp? + if (vstartup_start >= min_vblank) { + dml_print("WARNING: DML_DLG: %s: vblank_start=%d vblank_end=%d\n", + __func__, + vblank_start, + vblank_end); + dml_print("WARNING: DML_DLG: %s: vstartup_start=%d should be less than min_vblank=%d\n", + __func__, + vstartup_start, + min_vblank); + min_vblank = vstartup_start + 1; + dml_print("WARNING: DML_DLG: %s: vstartup_start=%d should be less than min_vblank=%d\n", + __func__, + vstartup_start, + min_vblank); + } + + dst_x_after_scaler = get_dst_x_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + dst_y_after_scaler = get_dst_y_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + + dml_print("DML_DLG: %s: htotal = %d\n", __func__, htotal); + dml_print("DML_DLG: %s: pixel_rate_delay_subtotal = %d\n", + __func__, + pixel_rate_delay_subtotal); + dml_print("DML_DLG: %s: dst_x_after_scaler = %d\n", + __func__, + dst_x_after_scaler); + dml_print("DML_DLG: %s: dst_y_after_scaler = %d\n", + __func__, + dst_y_after_scaler); + + // Lwait + line_wait = mode_lib->soc.urgent_latency_us; + if (cstate_en) + line_wait = dml_max(mode_lib->soc.sr_enter_plus_exit_time_us, line_wait); + if (pstate_en) + line_wait = dml_max(mode_lib->soc.dram_clock_change_latency_us + + mode_lib->soc.urgent_latency_us, + line_wait); + line_wait = line_wait / line_time_in_us; + + dst_y_prefetch = get_dst_y_prefetch(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + dml_print("DML_DLG: %s: dst_y_prefetch (after rnd) = %3.2f\n", __func__, dst_y_prefetch); + + dst_y_per_vm_vblank = get_dst_y_per_vm_vblank(mode_lib, + e2e_pipe_param, + num_pipes, + pipe_idx); + dst_y_per_row_vblank = get_dst_y_per_row_vblank(mode_lib, + e2e_pipe_param, + num_pipes, + pipe_idx); + dst_y_per_vm_flip = get_dst_y_per_vm_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + dst_y_per_row_flip = get_dst_y_per_row_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + + min_dst_y_per_vm_vblank = 8.0; + min_dst_y_per_row_vblank = 16.0; + + // magic! + if (htotal <= 75) { + min_vblank = 300; + min_dst_y_per_vm_vblank = 100.0; + min_dst_y_per_row_vblank = 100.0; + } + + dml_print("DML_DLG: %s: dst_y_per_vm_vblank = %3.2f\n", __func__, dst_y_per_vm_vblank); + dml_print("DML_DLG: %s: dst_y_per_row_vblank = %3.2f\n", __func__, dst_y_per_row_vblank); + + ASSERT(dst_y_per_vm_vblank < min_dst_y_per_vm_vblank); + ASSERT(dst_y_per_row_vblank < min_dst_y_per_row_vblank); + + ASSERT(dst_y_prefetch > (dst_y_per_vm_vblank + dst_y_per_row_vblank)); + lsw = dst_y_prefetch - (dst_y_per_vm_vblank + dst_y_per_row_vblank); + + dml_print("DML_DLG: %s: lsw = %3.2f\n", __func__, lsw); + + vratio_pre_l = get_vratio_prefetch_l(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + vratio_pre_c = get_vratio_prefetch_c(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + + dml_print("DML_DLG: %s: vratio_pre_l=%3.2f\n", __func__, vratio_pre_l); + dml_print("DML_DLG: %s: vratio_pre_c=%3.2f\n", __func__, vratio_pre_c); + + // Active + req_per_swath_ub_l = rq_dlg_param->rq_l.req_per_swath_ub; + req_per_swath_ub_c = rq_dlg_param->rq_c.req_per_swath_ub; + meta_row_height_l = rq_dlg_param->rq_l.meta_row_height; + meta_row_height_c = rq_dlg_param->rq_c.meta_row_height; + swath_width_pixels_ub_l = 0; + swath_width_pixels_ub_c = 0; + scaler_rec_in_width_l = 0; + scaler_rec_in_width_c = 0; + dpte_row_height_l = rq_dlg_param->rq_l.dpte_row_height; + dpte_row_height_c = rq_dlg_param->rq_c.dpte_row_height; + + if (mode_422) { + swath_width_pixels_ub_l = swath_width_ub_l * 2; // *2 for 2 pixel per element + swath_width_pixels_ub_c = swath_width_ub_c * 2; + } else { + swath_width_pixels_ub_l = swath_width_ub_l * 1; + swath_width_pixels_ub_c = swath_width_ub_c * 1; + } + + hscale_pixel_rate_l = 0.; + hscale_pixel_rate_c = 0.; + min_hratio_fact_l = 1.0; + min_hratio_fact_c = 1.0; + + if (htaps_l <= 1) + min_hratio_fact_l = 2.0; + else if (htaps_l <= 6) { + if ((hratio_l * 2.0) > 4.0) + min_hratio_fact_l = 4.0; + else + min_hratio_fact_l = hratio_l * 2.0; + } else { + if (hratio_l > 4.0) + min_hratio_fact_l = 4.0; + else + min_hratio_fact_l = hratio_l; + } + + hscale_pixel_rate_l = min_hratio_fact_l * dppclk_freq_in_mhz; + + if (htaps_c <= 1) + min_hratio_fact_c = 2.0; + else if (htaps_c <= 6) { + if ((hratio_c * 2.0) > 4.0) + min_hratio_fact_c = 4.0; + else + min_hratio_fact_c = hratio_c * 2.0; + } else { + if (hratio_c > 4.0) + min_hratio_fact_c = 4.0; + else + min_hratio_fact_c = hratio_c; + } + + hscale_pixel_rate_c = min_hratio_fact_c * dppclk_freq_in_mhz; + + refcyc_per_line_delivery_pre_l = 0.; + refcyc_per_line_delivery_pre_c = 0.; + refcyc_per_line_delivery_l = 0.; + refcyc_per_line_delivery_c = 0.; + + refcyc_per_req_delivery_pre_l = 0.; + refcyc_per_req_delivery_pre_c = 0.; + refcyc_per_req_delivery_l = 0.; + refcyc_per_req_delivery_c = 0.; + + full_recout_width = 0; + // In ODM + if (src->is_hsplit) { + // This "hack" is only allowed (and valid) for MPC combine. In ODM + // combine, you MUST specify the full_recout_width...according to Oswin + if (dst->full_recout_width == 0 && !dst->odm_combine) { + dml_print("DML_DLG: %s: Warning: full_recout_width not set in hsplit mode\n", + __func__); + full_recout_width = dst->recout_width * 2; // assume half split for dcn1 + } else + full_recout_width = dst->full_recout_width; + } else + full_recout_width = dst->recout_width; + + // As of DCN2, mpc_combine and odm_combine are mutually exclusive + refcyc_per_line_delivery_pre_l = get_refcyc_per_delivery(mode_lib, + refclk_freq_in_mhz, + pclk_freq_in_mhz, + dst->odm_combine, + full_recout_width, + dst->hactive, + vratio_pre_l, + hscale_pixel_rate_l, + swath_width_pixels_ub_l, + 1); // per line + + refcyc_per_line_delivery_l = get_refcyc_per_delivery(mode_lib, + refclk_freq_in_mhz, + pclk_freq_in_mhz, + dst->odm_combine, + full_recout_width, + dst->hactive, + vratio_l, + hscale_pixel_rate_l, + swath_width_pixels_ub_l, + 1); // per line + + dml_print("DML_DLG: %s: full_recout_width = %d\n", + __func__, + full_recout_width); + dml_print("DML_DLG: %s: hscale_pixel_rate_l = %3.2f\n", + __func__, + hscale_pixel_rate_l); + dml_print("DML_DLG: %s: refcyc_per_line_delivery_pre_l = %3.2f\n", + __func__, + refcyc_per_line_delivery_pre_l); + dml_print("DML_DLG: %s: refcyc_per_line_delivery_l = %3.2f\n", + __func__, + refcyc_per_line_delivery_l); + + if (dual_plane) { + refcyc_per_line_delivery_pre_c = get_refcyc_per_delivery(mode_lib, + refclk_freq_in_mhz, + pclk_freq_in_mhz, + dst->odm_combine, + full_recout_width, + dst->hactive, + vratio_pre_c, + hscale_pixel_rate_c, + swath_width_pixels_ub_c, + 1); // per line + + refcyc_per_line_delivery_c = get_refcyc_per_delivery(mode_lib, + refclk_freq_in_mhz, + pclk_freq_in_mhz, + dst->odm_combine, + full_recout_width, + dst->hactive, + vratio_c, + hscale_pixel_rate_c, + swath_width_pixels_ub_c, + 1); // per line + + dml_print("DML_DLG: %s: refcyc_per_line_delivery_pre_c = %3.2f\n", + __func__, + refcyc_per_line_delivery_pre_c); + dml_print("DML_DLG: %s: refcyc_per_line_delivery_c = %3.2f\n", + __func__, + refcyc_per_line_delivery_c); + } + + // TTU - Luma / Chroma + if (access_dir) { // vertical access + scaler_rec_in_width_l = vp_height_l; + scaler_rec_in_width_c = vp_height_c; + } else { + scaler_rec_in_width_l = vp_width_l; + scaler_rec_in_width_c = vp_width_c; + } + + refcyc_per_req_delivery_pre_l = get_refcyc_per_delivery(mode_lib, + refclk_freq_in_mhz, + pclk_freq_in_mhz, + dst->odm_combine, + full_recout_width, + dst->hactive, + vratio_pre_l, + hscale_pixel_rate_l, + scaler_rec_in_width_l, + req_per_swath_ub_l); // per req + refcyc_per_req_delivery_l = get_refcyc_per_delivery(mode_lib, + refclk_freq_in_mhz, + pclk_freq_in_mhz, + dst->odm_combine, + full_recout_width, + dst->hactive, + vratio_l, + hscale_pixel_rate_l, + scaler_rec_in_width_l, + req_per_swath_ub_l); // per req + + dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_l = %3.2f\n", + __func__, + refcyc_per_req_delivery_pre_l); + dml_print("DML_DLG: %s: refcyc_per_req_delivery_l = %3.2f\n", + __func__, + refcyc_per_req_delivery_l); + + ASSERT(refcyc_per_req_delivery_pre_l < dml_pow(2, 13)); + ASSERT(refcyc_per_req_delivery_l < dml_pow(2, 13)); + + if (dual_plane) { + refcyc_per_req_delivery_pre_c = get_refcyc_per_delivery(mode_lib, + refclk_freq_in_mhz, + pclk_freq_in_mhz, + dst->odm_combine, + full_recout_width, + dst->hactive, + vratio_pre_c, + hscale_pixel_rate_c, + scaler_rec_in_width_c, + req_per_swath_ub_c); // per req + refcyc_per_req_delivery_c = get_refcyc_per_delivery(mode_lib, + refclk_freq_in_mhz, + pclk_freq_in_mhz, + dst->odm_combine, + full_recout_width, + dst->hactive, + vratio_c, + hscale_pixel_rate_c, + scaler_rec_in_width_c, + req_per_swath_ub_c); // per req + + dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_c = %3.2f\n", + __func__, + refcyc_per_req_delivery_pre_c); + dml_print("DML_DLG: %s: refcyc_per_req_delivery_c = %3.2f\n", + __func__, + refcyc_per_req_delivery_c); + + ASSERT(refcyc_per_req_delivery_pre_c < dml_pow(2, 13)); + ASSERT(refcyc_per_req_delivery_c < dml_pow(2, 13)); + } + + // TTU - Cursor + refcyc_per_req_delivery_pre_cur0 = 0.0; + refcyc_per_req_delivery_cur0 = 0.0; + if (src->num_cursors > 0) { + calculate_ttu_cursor(mode_lib, + &refcyc_per_req_delivery_pre_cur0, + &refcyc_per_req_delivery_cur0, + refclk_freq_in_mhz, + ref_freq_to_pix_freq, + hscale_pixel_rate_l, + scl->hscl_ratio, + vratio_pre_l, + vratio_l, + src->cur0_src_width, + (enum cursor_bpp)(src->cur0_bpp)); + } + + refcyc_per_req_delivery_pre_cur1 = 0.0; + refcyc_per_req_delivery_cur1 = 0.0; + if (src->num_cursors > 1) { + calculate_ttu_cursor(mode_lib, + &refcyc_per_req_delivery_pre_cur1, + &refcyc_per_req_delivery_cur1, + refclk_freq_in_mhz, + ref_freq_to_pix_freq, + hscale_pixel_rate_l, + scl->hscl_ratio, + vratio_pre_l, + vratio_l, + src->cur1_src_width, + (enum cursor_bpp)(src->cur1_bpp)); + } + + // TTU - Misc + // all hard-coded + + // Assignment to register structures + disp_dlg_regs->dst_y_after_scaler = dst_y_after_scaler; // in terms of line + disp_dlg_regs->refcyc_x_after_scaler = dst_x_after_scaler * ref_freq_to_pix_freq; // in terms of refclk + ASSERT(disp_dlg_regs->refcyc_x_after_scaler < (unsigned int) dml_pow(2, 13)); + disp_dlg_regs->dst_y_prefetch = (unsigned int) (dst_y_prefetch * dml_pow(2, 2)); + disp_dlg_regs->dst_y_per_vm_vblank = (unsigned int) (dst_y_per_vm_vblank * dml_pow(2, 2)); + disp_dlg_regs->dst_y_per_row_vblank = (unsigned int) (dst_y_per_row_vblank * dml_pow(2, 2)); + disp_dlg_regs->dst_y_per_vm_flip = (unsigned int) (dst_y_per_vm_flip * dml_pow(2, 2)); + disp_dlg_regs->dst_y_per_row_flip = (unsigned int) (dst_y_per_row_flip * dml_pow(2, 2)); + + disp_dlg_regs->vratio_prefetch = (unsigned int) (vratio_pre_l * dml_pow(2, 19)); + disp_dlg_regs->vratio_prefetch_c = (unsigned int) (vratio_pre_c * dml_pow(2, 19)); + + disp_dlg_regs->refcyc_per_pte_group_vblank_l = + (unsigned int) (dst_y_per_row_vblank * (double) htotal + * ref_freq_to_pix_freq / (double) dpte_groups_per_row_ub_l); + ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_l < (unsigned int) dml_pow(2, 13)); + + if (dual_plane) { + disp_dlg_regs->refcyc_per_pte_group_vblank_c = (unsigned int) (dst_y_per_row_vblank + * (double) htotal * ref_freq_to_pix_freq + / (double) dpte_groups_per_row_ub_c); + ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_c + < (unsigned int) dml_pow(2, 13)); + } + + disp_dlg_regs->refcyc_per_meta_chunk_vblank_l = + (unsigned int) (dst_y_per_row_vblank * (double) htotal + * ref_freq_to_pix_freq / (double) meta_chunks_per_row_ub_l); + ASSERT(disp_dlg_regs->refcyc_per_meta_chunk_vblank_l < (unsigned int) dml_pow(2, 13)); + + disp_dlg_regs->refcyc_per_meta_chunk_vblank_c = + disp_dlg_regs->refcyc_per_meta_chunk_vblank_l; // dcc for 4:2:0 is not supported in dcn1.0. assigned to be the same as _l for now + + disp_dlg_regs->refcyc_per_pte_group_flip_l = (unsigned int) (dst_y_per_row_flip * htotal + * ref_freq_to_pix_freq) / dpte_groups_per_row_ub_l; + disp_dlg_regs->refcyc_per_meta_chunk_flip_l = (unsigned int) (dst_y_per_row_flip * htotal + * ref_freq_to_pix_freq) / meta_chunks_per_row_ub_l; + + if (dual_plane) { + disp_dlg_regs->refcyc_per_pte_group_flip_c = (unsigned int) (dst_y_per_row_flip + * htotal * ref_freq_to_pix_freq) / dpte_groups_per_row_ub_c; + disp_dlg_regs->refcyc_per_meta_chunk_flip_c = (unsigned int) (dst_y_per_row_flip + * htotal * ref_freq_to_pix_freq) / meta_chunks_per_row_ub_c; + } + + disp_dlg_regs->dst_y_per_pte_row_nom_l = (unsigned int) ((double) dpte_row_height_l + / (double) vratio_l * dml_pow(2, 2)); + ASSERT(disp_dlg_regs->dst_y_per_pte_row_nom_l < (unsigned int) dml_pow(2, 17)); + + if (dual_plane) { + disp_dlg_regs->dst_y_per_pte_row_nom_c = (unsigned int) ((double) dpte_row_height_c + / (double) vratio_c * dml_pow(2, 2)); + if (disp_dlg_regs->dst_y_per_pte_row_nom_c >= (unsigned int) dml_pow(2, 17)) { + dml_print("DML_DLG: %s: Warning dst_y_per_pte_row_nom_c %u larger than supported by register format U15.2 %u\n", + __func__, + disp_dlg_regs->dst_y_per_pte_row_nom_c, + (unsigned int) dml_pow(2, 17) - 1); + } + } + + disp_dlg_regs->dst_y_per_meta_row_nom_l = (unsigned int) ((double) meta_row_height_l + / (double) vratio_l * dml_pow(2, 2)); + ASSERT(disp_dlg_regs->dst_y_per_meta_row_nom_l < (unsigned int) dml_pow(2, 17)); + + disp_dlg_regs->dst_y_per_meta_row_nom_c = disp_dlg_regs->dst_y_per_meta_row_nom_l; // TODO: dcc for 4:2:0 is not supported in dcn1.0. assigned to be the same as _l for now + + disp_dlg_regs->refcyc_per_pte_group_nom_l = (unsigned int) ((double) dpte_row_height_l + / (double) vratio_l * (double) htotal * ref_freq_to_pix_freq + / (double) dpte_groups_per_row_ub_l); + if (disp_dlg_regs->refcyc_per_pte_group_nom_l >= (unsigned int) dml_pow(2, 23)) + disp_dlg_regs->refcyc_per_pte_group_nom_l = dml_pow(2, 23) - 1; + disp_dlg_regs->refcyc_per_meta_chunk_nom_l = (unsigned int) ((double) meta_row_height_l + / (double) vratio_l * (double) htotal * ref_freq_to_pix_freq + / (double) meta_chunks_per_row_ub_l); + if (disp_dlg_regs->refcyc_per_meta_chunk_nom_l >= (unsigned int) dml_pow(2, 23)) + disp_dlg_regs->refcyc_per_meta_chunk_nom_l = dml_pow(2, 23) - 1; + + if (dual_plane) { + disp_dlg_regs->refcyc_per_pte_group_nom_c = + (unsigned int) ((double) dpte_row_height_c / (double) vratio_c + * (double) htotal * ref_freq_to_pix_freq + / (double) dpte_groups_per_row_ub_c); + if (disp_dlg_regs->refcyc_per_pte_group_nom_c >= (unsigned int) dml_pow(2, 23)) + disp_dlg_regs->refcyc_per_pte_group_nom_c = dml_pow(2, 23) - 1; + + // TODO: Is this the right calculation? Does htotal need to be halved? + disp_dlg_regs->refcyc_per_meta_chunk_nom_c = + (unsigned int) ((double) meta_row_height_c / (double) vratio_c + * (double) htotal * ref_freq_to_pix_freq + / (double) meta_chunks_per_row_ub_c); + if (disp_dlg_regs->refcyc_per_meta_chunk_nom_c >= (unsigned int) dml_pow(2, 23)) + disp_dlg_regs->refcyc_per_meta_chunk_nom_c = dml_pow(2, 23) - 1; + } + + disp_dlg_regs->refcyc_per_line_delivery_pre_l = (unsigned int) dml_floor(refcyc_per_line_delivery_pre_l, + 1); + disp_dlg_regs->refcyc_per_line_delivery_l = (unsigned int) dml_floor(refcyc_per_line_delivery_l, + 1); + ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_l < (unsigned int) dml_pow(2, 13)); + ASSERT(disp_dlg_regs->refcyc_per_line_delivery_l < (unsigned int) dml_pow(2, 13)); + + disp_dlg_regs->refcyc_per_line_delivery_pre_c = (unsigned int) dml_floor(refcyc_per_line_delivery_pre_c, + 1); + disp_dlg_regs->refcyc_per_line_delivery_c = (unsigned int) dml_floor(refcyc_per_line_delivery_c, + 1); + ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_c < (unsigned int) dml_pow(2, 13)); + ASSERT(disp_dlg_regs->refcyc_per_line_delivery_c < (unsigned int) dml_pow(2, 13)); + + disp_dlg_regs->chunk_hdl_adjust_cur0 = 3; + disp_dlg_regs->dst_y_offset_cur0 = 0; + disp_dlg_regs->chunk_hdl_adjust_cur1 = 3; + disp_dlg_regs->dst_y_offset_cur1 = 0; + + disp_dlg_regs->dst_y_delta_drq_limit = 0x7fff; // off + + disp_ttu_regs->refcyc_per_req_delivery_pre_l = (unsigned int) (refcyc_per_req_delivery_pre_l + * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_l = (unsigned int) (refcyc_per_req_delivery_l + * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_pre_c = (unsigned int) (refcyc_per_req_delivery_pre_c + * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_c = (unsigned int) (refcyc_per_req_delivery_c + * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_pre_cur0 = + (unsigned int) (refcyc_per_req_delivery_pre_cur0 * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_cur0 = (unsigned int) (refcyc_per_req_delivery_cur0 + * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_pre_cur1 = + (unsigned int) (refcyc_per_req_delivery_pre_cur1 * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_cur1 = (unsigned int) (refcyc_per_req_delivery_cur1 + * dml_pow(2, 10)); + disp_ttu_regs->qos_level_low_wm = 0; + ASSERT(disp_ttu_regs->qos_level_low_wm < dml_pow(2, 14)); + disp_ttu_regs->qos_level_high_wm = (unsigned int) (4.0 * (double) htotal + * ref_freq_to_pix_freq); + /*ASSERT(disp_ttu_regs->qos_level_high_wm < dml_pow(2, 14));*/ + + disp_ttu_regs->qos_level_flip = 14; + disp_ttu_regs->qos_level_fixed_l = 8; + disp_ttu_regs->qos_level_fixed_c = 8; + disp_ttu_regs->qos_level_fixed_cur0 = 8; + disp_ttu_regs->qos_ramp_disable_l = 0; + disp_ttu_regs->qos_ramp_disable_c = 0; + disp_ttu_regs->qos_ramp_disable_cur0 = 0; + + disp_ttu_regs->min_ttu_vblank = min_ttu_vblank * refclk_freq_in_mhz; + ASSERT(disp_ttu_regs->min_ttu_vblank < dml_pow(2, 24)); + + print__ttu_regs_st(mode_lib, disp_ttu_regs); + print__dlg_regs_st(mode_lib, disp_dlg_regs); +} + +void dml20_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib, + display_dlg_regs_st *dlg_regs, + display_ttu_regs_st *ttu_regs, + const display_e2e_pipe_params_st *e2e_pipe_param, + const unsigned int num_pipes, + const unsigned int pipe_idx, + const bool cstate_en, + const bool pstate_en, + const bool vm_en, + const bool ignore_viewport_pos, + const bool immediate_flip_support) +{ + display_rq_params_st rq_param = {0}; + display_dlg_sys_params_st dlg_sys_param = {0}; + + // Get watermark and Tex. + dlg_sys_param.t_urg_wm_us = get_wm_urgent(mode_lib, e2e_pipe_param, num_pipes); + dlg_sys_param.deepsleep_dcfclk_mhz = get_clk_dcf_deepsleep(mode_lib, + e2e_pipe_param, + num_pipes); + dlg_sys_param.t_extra_us = get_urgent_extra_latency(mode_lib, e2e_pipe_param, num_pipes); + dlg_sys_param.mem_trip_us = get_wm_memory_trip(mode_lib, e2e_pipe_param, num_pipes); + dlg_sys_param.t_mclk_wm_us = get_wm_dram_clock_change(mode_lib, e2e_pipe_param, num_pipes); + dlg_sys_param.t_sr_wm_us = get_wm_stutter_enter_exit(mode_lib, e2e_pipe_param, num_pipes); + dlg_sys_param.total_flip_bw = get_total_immediate_flip_bw(mode_lib, + e2e_pipe_param, + num_pipes); + dlg_sys_param.total_flip_bytes = get_total_immediate_flip_bytes(mode_lib, + e2e_pipe_param, + num_pipes); + + print__dlg_sys_params_st(mode_lib, &dlg_sys_param); + + // system parameter calculation done + + dml_print("DML_DLG: Calculation for pipe[%d] start\n\n", pipe_idx); + dml20_rq_dlg_get_rq_params(mode_lib, &rq_param, &e2e_pipe_param[pipe_idx].pipe.src); + dml20_rq_dlg_get_dlg_params(mode_lib, + e2e_pipe_param, + num_pipes, + pipe_idx, + dlg_regs, + ttu_regs, + &rq_param.dlg, + &dlg_sys_param, + cstate_en, + pstate_en); + dml_print("DML_DLG: Calculation for pipe[%d] end\n", pipe_idx); +} + +static void calculate_ttu_cursor(struct display_mode_lib *mode_lib, + double *refcyc_per_req_delivery_pre_cur, + double *refcyc_per_req_delivery_cur, + double refclk_freq_in_mhz, + double ref_freq_to_pix_freq, + double hscale_pixel_rate_l, + double hscl_ratio, + double vratio_pre_l, + double vratio_l, + unsigned int cur_width, + enum cursor_bpp cur_bpp) +{ + unsigned int cur_src_width = cur_width; + unsigned int cur_req_size = 0; + unsigned int cur_req_width = 0; + double cur_width_ub = 0.0; + double cur_req_per_width = 0.0; + double hactive_cur = 0.0; + + ASSERT(cur_src_width <= 256); + + *refcyc_per_req_delivery_pre_cur = 0.0; + *refcyc_per_req_delivery_cur = 0.0; + if (cur_src_width > 0) { + unsigned int cur_bit_per_pixel = 0; + + if (cur_bpp == dm_cur_2bit) { + cur_req_size = 64; // byte + cur_bit_per_pixel = 2; + } else { // 32bit + cur_bit_per_pixel = 32; + if (cur_src_width >= 1 && cur_src_width <= 16) + cur_req_size = 64; + else if (cur_src_width >= 17 && cur_src_width <= 31) + cur_req_size = 128; + else + cur_req_size = 256; + } + + cur_req_width = (double) cur_req_size / ((double) cur_bit_per_pixel / 8.0); + cur_width_ub = dml_ceil((double) cur_src_width / (double) cur_req_width, 1) + * (double) cur_req_width; + cur_req_per_width = cur_width_ub / (double) cur_req_width; + hactive_cur = (double) cur_src_width / hscl_ratio; // TODO: oswin to think about what to do for cursor + + if (vratio_pre_l <= 1.0) { + *refcyc_per_req_delivery_pre_cur = hactive_cur * ref_freq_to_pix_freq + / (double) cur_req_per_width; + } else { + *refcyc_per_req_delivery_pre_cur = (double) refclk_freq_in_mhz + * (double) cur_src_width / hscale_pixel_rate_l + / (double) cur_req_per_width; + } + + ASSERT(*refcyc_per_req_delivery_pre_cur < dml_pow(2, 13)); + + if (vratio_l <= 1.0) { + *refcyc_per_req_delivery_cur = hactive_cur * ref_freq_to_pix_freq + / (double) cur_req_per_width; + } else { + *refcyc_per_req_delivery_cur = (double) refclk_freq_in_mhz + * (double) cur_src_width / hscale_pixel_rate_l + / (double) cur_req_per_width; + } + + dml_print("DML_DLG: %s: cur_req_width = %d\n", + __func__, + cur_req_width); + dml_print("DML_DLG: %s: cur_width_ub = %3.2f\n", + __func__, + cur_width_ub); + dml_print("DML_DLG: %s: cur_req_per_width = %3.2f\n", + __func__, + cur_req_per_width); + dml_print("DML_DLG: %s: hactive_cur = %3.2f\n", + __func__, + hactive_cur); + dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_cur = %3.2f\n", + __func__, + *refcyc_per_req_delivery_pre_cur); + dml_print("DML_DLG: %s: refcyc_per_req_delivery_cur = %3.2f\n", + __func__, + *refcyc_per_req_delivery_cur); + + ASSERT(*refcyc_per_req_delivery_cur < dml_pow(2, 13)); + } +} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.h b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.h new file mode 100644 index 000000000..8b23867e9 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.h @@ -0,0 +1,73 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DML20_DISPLAY_RQ_DLG_CALC_H__ +#define __DML20_DISPLAY_RQ_DLG_CALC_H__ + +#include "../display_rq_dlg_helpers.h" + +struct display_mode_lib; + + +// Function: dml_rq_dlg_get_rq_reg +// Main entry point for test to get the register values out of this DML class. +// This function calls <get_rq_param> and <extract_rq_regs> functions to calculate +// and then populate the rq_regs struct +// Input: +// pipe_src_param - pipe source configuration (e.g. vp, pitch, etc.) +// Output: +// rq_regs - struct that holds all the RQ registers field value. +// See also: <display_rq_regs_st> +void dml20_rq_dlg_get_rq_reg( + struct display_mode_lib *mode_lib, + display_rq_regs_st *rq_regs, + const display_pipe_params_st *pipe_param); + + +// Function: dml_rq_dlg_get_dlg_reg +// Calculate and return DLG and TTU register struct given the system setting +// Output: +// dlg_regs - output DLG register struct +// ttu_regs - output DLG TTU register struct +// Input: +// e2e_pipe_param - "compacted" array of e2e pipe param struct +// num_pipes - num of active "pipe" or "route" +// pipe_idx - index that identifies the e2e_pipe_param that corresponding to this dlg +// cstate - 0: when calculate min_ttu_vblank it is assumed cstate is not required. 1: Normal mode, cstate is considered. +// Added for legacy or unrealistic timing tests. +void dml20_rq_dlg_get_dlg_reg( + struct display_mode_lib *mode_lib, + display_dlg_regs_st *dlg_regs, + display_ttu_regs_st *ttu_regs, + const display_e2e_pipe_params_st *e2e_pipe_param, + const unsigned int num_pipes, + const unsigned int pipe_idx, + const bool cstate_en, + const bool pstate_en, + const bool vm_en, + const bool ignore_viewport_pos, + const bool immediate_flip_support); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c new file mode 100644 index 000000000..0fc9f3e3f --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c @@ -0,0 +1,1686 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "../display_mode_lib.h" +#include "../display_mode_vba.h" +#include "display_rq_dlg_calc_20v2.h" + +// Function: dml20v2_rq_dlg_get_rq_params +// Calculate requestor related parameters that register definition agnostic +// (i.e. this layer does try to separate real values from register definition) +// Input: +// pipe_src_param - pipe source configuration (e.g. vp, pitch, etc.) +// Output: +// rq_param - values that can be used to setup RQ (e.g. swath_height, plane1_addr, etc.) +// +static void dml20v2_rq_dlg_get_rq_params( + struct display_mode_lib *mode_lib, + display_rq_params_st * rq_param, + const display_pipe_source_params_st *pipe_src_param); + +// Function: dml20v2_rq_dlg_get_dlg_params +// Calculate deadline related parameters +// +static void dml20v2_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib, + const display_e2e_pipe_params_st *e2e_pipe_param, + const unsigned int num_pipes, + const unsigned int pipe_idx, + display_dlg_regs_st *disp_dlg_regs, + display_ttu_regs_st *disp_ttu_regs, + const display_rq_dlg_params_st *rq_dlg_param, + const display_dlg_sys_params_st *dlg_sys_param, + const bool cstate_en, + const bool pstate_en); +/* + * NOTE: + * This file is gcc-parseable HW gospel, coming straight from HW engineers. + * + * It doesn't adhere to Linux kernel style and sometimes will do things in odd + * ways. Unless there is something clearly wrong with it the code should + * remain as-is as it provides us with a guarantee from HW that it is correct. + */ + +static void calculate_ttu_cursor(struct display_mode_lib *mode_lib, + double *refcyc_per_req_delivery_pre_cur, + double *refcyc_per_req_delivery_cur, + double refclk_freq_in_mhz, + double ref_freq_to_pix_freq, + double hscale_pixel_rate_l, + double hscl_ratio, + double vratio_pre_l, + double vratio_l, + unsigned int cur_width, + enum cursor_bpp cur_bpp); + +#include "../dml_inline_defs.h" + +static unsigned int get_bytes_per_element(enum source_format_class source_format, bool is_chroma) +{ + unsigned int ret_val = 0; + + if (source_format == dm_444_16) { + if (!is_chroma) + ret_val = 2; + } else if (source_format == dm_444_32) { + if (!is_chroma) + ret_val = 4; + } else if (source_format == dm_444_64) { + if (!is_chroma) + ret_val = 8; + } else if (source_format == dm_420_8) { + if (is_chroma) + ret_val = 2; + else + ret_val = 1; + } else if (source_format == dm_420_10) { + if (is_chroma) + ret_val = 4; + else + ret_val = 2; + } else if (source_format == dm_444_8) { + ret_val = 1; + } + return ret_val; +} + +static bool is_dual_plane(enum source_format_class source_format) +{ + bool ret_val = false; + + if ((source_format == dm_420_8) || (source_format == dm_420_10)) + ret_val = true; + + return ret_val; +} + +static double get_refcyc_per_delivery(struct display_mode_lib *mode_lib, + double refclk_freq_in_mhz, + double pclk_freq_in_mhz, + bool odm_combine, + unsigned int recout_width, + unsigned int hactive, + double vratio, + double hscale_pixel_rate, + unsigned int delivery_width, + unsigned int req_per_swath_ub) +{ + double refcyc_per_delivery = 0.0; + + if (vratio <= 1.0) { + if (odm_combine) + refcyc_per_delivery = (double) refclk_freq_in_mhz + * dml_min((double) recout_width, (double) hactive / 2.0) + / pclk_freq_in_mhz / (double) req_per_swath_ub; + else + refcyc_per_delivery = (double) refclk_freq_in_mhz * (double) recout_width + / pclk_freq_in_mhz / (double) req_per_swath_ub; + } else { + refcyc_per_delivery = (double) refclk_freq_in_mhz * (double) delivery_width + / (double) hscale_pixel_rate / (double) req_per_swath_ub; + } + + dml_print("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, refclk_freq_in_mhz); + dml_print("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, pclk_freq_in_mhz); + dml_print("DML_DLG: %s: recout_width = %d\n", __func__, recout_width); + dml_print("DML_DLG: %s: vratio = %3.2f\n", __func__, vratio); + dml_print("DML_DLG: %s: req_per_swath_ub = %d\n", __func__, req_per_swath_ub); + dml_print("DML_DLG: %s: refcyc_per_delivery= %3.2f\n", __func__, refcyc_per_delivery); + + return refcyc_per_delivery; + +} + +static unsigned int get_blk_size_bytes(const enum source_macro_tile_size tile_size) +{ + if (tile_size == dm_256k_tile) + return (256 * 1024); + else if (tile_size == dm_64k_tile) + return (64 * 1024); + else + return (4 * 1024); +} + +static void extract_rq_sizing_regs(struct display_mode_lib *mode_lib, + display_data_rq_regs_st *rq_regs, + const display_data_rq_sizing_params_st *rq_sizing) +{ + dml_print("DML_DLG: %s: rq_sizing param\n", __func__); + print__data_rq_sizing_params_st(mode_lib, rq_sizing); + + rq_regs->chunk_size = dml_log2(rq_sizing->chunk_bytes) - 10; + + if (rq_sizing->min_chunk_bytes == 0) + rq_regs->min_chunk_size = 0; + else + rq_regs->min_chunk_size = dml_log2(rq_sizing->min_chunk_bytes) - 8 + 1; + + rq_regs->meta_chunk_size = dml_log2(rq_sizing->meta_chunk_bytes) - 10; + if (rq_sizing->min_meta_chunk_bytes == 0) + rq_regs->min_meta_chunk_size = 0; + else + rq_regs->min_meta_chunk_size = dml_log2(rq_sizing->min_meta_chunk_bytes) - 6 + 1; + + rq_regs->dpte_group_size = dml_log2(rq_sizing->dpte_group_bytes) - 6; + rq_regs->mpte_group_size = dml_log2(rq_sizing->mpte_group_bytes) - 6; +} + +static void extract_rq_regs(struct display_mode_lib *mode_lib, + display_rq_regs_st *rq_regs, + const display_rq_params_st *rq_param) +{ + unsigned int detile_buf_size_in_bytes = mode_lib->ip.det_buffer_size_kbytes * 1024; + unsigned int detile_buf_plane1_addr = 0; + + extract_rq_sizing_regs(mode_lib, &(rq_regs->rq_regs_l), &rq_param->sizing.rq_l); + + rq_regs->rq_regs_l.pte_row_height_linear = dml_floor(dml_log2(rq_param->dlg.rq_l.dpte_row_height), + 1) - 3; + + if (rq_param->yuv420) { + extract_rq_sizing_regs(mode_lib, &(rq_regs->rq_regs_c), &rq_param->sizing.rq_c); + rq_regs->rq_regs_c.pte_row_height_linear = dml_floor(dml_log2(rq_param->dlg.rq_c.dpte_row_height), + 1) - 3; + } + + rq_regs->rq_regs_l.swath_height = dml_log2(rq_param->dlg.rq_l.swath_height); + rq_regs->rq_regs_c.swath_height = dml_log2(rq_param->dlg.rq_c.swath_height); + + // TODO: take the max between luma, chroma chunk size? + // okay for now, as we are setting chunk_bytes to 8kb anyways + if (rq_param->sizing.rq_l.chunk_bytes >= 32 * 1024) { //32kb + rq_regs->drq_expansion_mode = 0; + } else { + rq_regs->drq_expansion_mode = 2; + } + rq_regs->prq_expansion_mode = 1; + rq_regs->mrq_expansion_mode = 1; + rq_regs->crq_expansion_mode = 1; + + if (rq_param->yuv420) { + if ((double) rq_param->misc.rq_l.stored_swath_bytes + / (double) rq_param->misc.rq_c.stored_swath_bytes <= 1.5) { + detile_buf_plane1_addr = (detile_buf_size_in_bytes / 2.0 / 64.0); // half to chroma + } else { + detile_buf_plane1_addr = dml_round_to_multiple((unsigned int) ((2.0 * detile_buf_size_in_bytes) / 3.0), + 256, + 0) / 64.0; // 2/3 to chroma + } + } + rq_regs->plane1_base_address = detile_buf_plane1_addr; +} + +static void handle_det_buf_split(struct display_mode_lib *mode_lib, + display_rq_params_st *rq_param, + const display_pipe_source_params_st *pipe_src_param) +{ + unsigned int total_swath_bytes = 0; + unsigned int swath_bytes_l = 0; + unsigned int swath_bytes_c = 0; + unsigned int full_swath_bytes_packed_l = 0; + unsigned int full_swath_bytes_packed_c = 0; + bool req128_l = false; + bool req128_c = false; + bool surf_linear = (pipe_src_param->sw_mode == dm_sw_linear); + bool surf_vert = (pipe_src_param->source_scan == dm_vert); + unsigned int log2_swath_height_l = 0; + unsigned int log2_swath_height_c = 0; + unsigned int detile_buf_size_in_bytes = mode_lib->ip.det_buffer_size_kbytes * 1024; + + full_swath_bytes_packed_l = rq_param->misc.rq_l.full_swath_bytes; + full_swath_bytes_packed_c = rq_param->misc.rq_c.full_swath_bytes; + + if (rq_param->yuv420_10bpc) { + full_swath_bytes_packed_l = dml_round_to_multiple(rq_param->misc.rq_l.full_swath_bytes * 2 / 3, + 256, + 1) + 256; + full_swath_bytes_packed_c = dml_round_to_multiple(rq_param->misc.rq_c.full_swath_bytes * 2 / 3, + 256, + 1) + 256; + } + + if (rq_param->yuv420) { + total_swath_bytes = 2 * full_swath_bytes_packed_l + 2 * full_swath_bytes_packed_c; + + if (total_swath_bytes <= detile_buf_size_in_bytes) { //full 256b request + req128_l = false; + req128_c = false; + swath_bytes_l = full_swath_bytes_packed_l; + swath_bytes_c = full_swath_bytes_packed_c; + } else { //128b request (for luma only for yuv420 8bpc) + req128_l = true; + req128_c = false; + swath_bytes_l = full_swath_bytes_packed_l / 2; + swath_bytes_c = full_swath_bytes_packed_c; + } + // Note: assumption, the config that pass in will fit into + // the detiled buffer. + } else { + total_swath_bytes = 2 * full_swath_bytes_packed_l; + + if (total_swath_bytes <= detile_buf_size_in_bytes) + req128_l = false; + else + req128_l = true; + + swath_bytes_l = total_swath_bytes; + swath_bytes_c = 0; + } + rq_param->misc.rq_l.stored_swath_bytes = swath_bytes_l; + rq_param->misc.rq_c.stored_swath_bytes = swath_bytes_c; + + if (surf_linear) { + log2_swath_height_l = 0; + log2_swath_height_c = 0; + } else { + unsigned int swath_height_l; + unsigned int swath_height_c; + + if (!surf_vert) { + swath_height_l = rq_param->misc.rq_l.blk256_height; + swath_height_c = rq_param->misc.rq_c.blk256_height; + } else { + swath_height_l = rq_param->misc.rq_l.blk256_width; + swath_height_c = rq_param->misc.rq_c.blk256_width; + } + + if (swath_height_l > 0) + log2_swath_height_l = dml_log2(swath_height_l); + + if (req128_l && log2_swath_height_l > 0) + log2_swath_height_l -= 1; + + if (swath_height_c > 0) + log2_swath_height_c = dml_log2(swath_height_c); + + if (req128_c && log2_swath_height_c > 0) + log2_swath_height_c -= 1; + } + + rq_param->dlg.rq_l.swath_height = 1 << log2_swath_height_l; + rq_param->dlg.rq_c.swath_height = 1 << log2_swath_height_c; + + dml_print("DML_DLG: %s: req128_l = %0d\n", __func__, req128_l); + dml_print("DML_DLG: %s: req128_c = %0d\n", __func__, req128_c); + dml_print("DML_DLG: %s: full_swath_bytes_packed_l = %0d\n", + __func__, + full_swath_bytes_packed_l); + dml_print("DML_DLG: %s: full_swath_bytes_packed_c = %0d\n", + __func__, + full_swath_bytes_packed_c); +} + +static void get_meta_and_pte_attr(struct display_mode_lib *mode_lib, + display_data_rq_dlg_params_st *rq_dlg_param, + display_data_rq_misc_params_st *rq_misc_param, + display_data_rq_sizing_params_st *rq_sizing_param, + unsigned int vp_width, + unsigned int vp_height, + unsigned int data_pitch, + unsigned int meta_pitch, + unsigned int source_format, + unsigned int tiling, + unsigned int macro_tile_size, + unsigned int source_scan, + unsigned int is_chroma) +{ + bool surf_linear = (tiling == dm_sw_linear); + bool surf_vert = (source_scan == dm_vert); + + unsigned int bytes_per_element; + unsigned int bytes_per_element_y = get_bytes_per_element((enum source_format_class)(source_format), + false); + unsigned int bytes_per_element_c = get_bytes_per_element((enum source_format_class)(source_format), + true); + + unsigned int blk256_width = 0; + unsigned int blk256_height = 0; + + unsigned int blk256_width_y = 0; + unsigned int blk256_height_y = 0; + unsigned int blk256_width_c = 0; + unsigned int blk256_height_c = 0; + unsigned int log2_bytes_per_element; + unsigned int log2_blk256_width; + unsigned int log2_blk256_height; + unsigned int blk_bytes; + unsigned int log2_blk_bytes; + unsigned int log2_blk_height; + unsigned int log2_blk_width; + unsigned int log2_meta_req_bytes; + unsigned int log2_meta_req_height; + unsigned int log2_meta_req_width; + unsigned int meta_req_width; + unsigned int meta_req_height; + unsigned int log2_meta_row_height; + unsigned int meta_row_width_ub; + unsigned int log2_meta_chunk_bytes; + unsigned int log2_meta_chunk_height; + + //full sized meta chunk width in unit of data elements + unsigned int log2_meta_chunk_width; + unsigned int log2_min_meta_chunk_bytes; + unsigned int min_meta_chunk_width; + unsigned int meta_chunk_width; + unsigned int meta_chunk_per_row_int; + unsigned int meta_row_remainder; + unsigned int meta_chunk_threshold; + unsigned int meta_blk_bytes; + unsigned int meta_blk_height; + unsigned int meta_blk_width; + unsigned int meta_surface_bytes; + unsigned int vmpg_bytes; + unsigned int meta_pte_req_per_frame_ub; + unsigned int meta_pte_bytes_per_frame_ub; + const unsigned int log2_vmpg_bytes = dml_log2(mode_lib->soc.vmm_page_size_bytes); + const unsigned int dpte_buf_in_pte_reqs = mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma; + const unsigned int pde_proc_buffer_size_64k_reqs = + mode_lib->ip.pde_proc_buffer_size_64k_reqs; + + unsigned int log2_vmpg_height = 0; + unsigned int log2_vmpg_width = 0; + unsigned int log2_dpte_req_height_ptes = 0; + unsigned int log2_dpte_req_height = 0; + unsigned int log2_dpte_req_width = 0; + unsigned int log2_dpte_row_height_linear = 0; + unsigned int log2_dpte_row_height = 0; + unsigned int log2_dpte_group_width = 0; + unsigned int dpte_row_width_ub = 0; + unsigned int dpte_req_height = 0; + unsigned int dpte_req_width = 0; + unsigned int dpte_group_width = 0; + unsigned int log2_dpte_group_bytes = 0; + unsigned int log2_dpte_group_length = 0; + unsigned int pde_buf_entries; + bool yuv420 = (source_format == dm_420_8 || source_format == dm_420_10); + + Calculate256BBlockSizes((enum source_format_class)(source_format), + (enum dm_swizzle_mode)(tiling), + bytes_per_element_y, + bytes_per_element_c, + &blk256_height_y, + &blk256_height_c, + &blk256_width_y, + &blk256_width_c); + + if (!is_chroma) { + blk256_width = blk256_width_y; + blk256_height = blk256_height_y; + bytes_per_element = bytes_per_element_y; + } else { + blk256_width = blk256_width_c; + blk256_height = blk256_height_c; + bytes_per_element = bytes_per_element_c; + } + + log2_bytes_per_element = dml_log2(bytes_per_element); + + dml_print("DML_DLG: %s: surf_linear = %d\n", __func__, surf_linear); + dml_print("DML_DLG: %s: surf_vert = %d\n", __func__, surf_vert); + dml_print("DML_DLG: %s: blk256_width = %d\n", __func__, blk256_width); + dml_print("DML_DLG: %s: blk256_height = %d\n", __func__, blk256_height); + + log2_blk256_width = dml_log2((double) blk256_width); + log2_blk256_height = dml_log2((double) blk256_height); + blk_bytes = surf_linear ? + 256 : get_blk_size_bytes((enum source_macro_tile_size) macro_tile_size); + log2_blk_bytes = dml_log2((double) blk_bytes); + log2_blk_height = 0; + log2_blk_width = 0; + + // remember log rule + // "+" in log is multiply + // "-" in log is divide + // "/2" is like square root + // blk is vertical biased + if (tiling != dm_sw_linear) + log2_blk_height = log2_blk256_height + + dml_ceil((double) (log2_blk_bytes - 8) / 2.0, 1); + else + log2_blk_height = 0; // blk height of 1 + + log2_blk_width = log2_blk_bytes - log2_bytes_per_element - log2_blk_height; + + if (!surf_vert) { + rq_dlg_param->swath_width_ub = dml_round_to_multiple(vp_width - 1, blk256_width, 1) + + blk256_width; + rq_dlg_param->req_per_swath_ub = rq_dlg_param->swath_width_ub >> log2_blk256_width; + } else { + rq_dlg_param->swath_width_ub = dml_round_to_multiple(vp_height - 1, blk256_height, 1) + + blk256_height; + rq_dlg_param->req_per_swath_ub = rq_dlg_param->swath_width_ub >> log2_blk256_height; + } + + if (!surf_vert) + rq_misc_param->full_swath_bytes = rq_dlg_param->swath_width_ub * blk256_height + * bytes_per_element; + else + rq_misc_param->full_swath_bytes = rq_dlg_param->swath_width_ub * blk256_width + * bytes_per_element; + + rq_misc_param->blk256_height = blk256_height; + rq_misc_param->blk256_width = blk256_width; + + // ------- + // meta + // ------- + log2_meta_req_bytes = 6; // meta request is 64b and is 8x8byte meta element + + // each 64b meta request for dcn is 8x8 meta elements and + // a meta element covers one 256b block of the data surface. + log2_meta_req_height = log2_blk256_height + 3; // meta req is 8x8 byte, each byte represent 1 blk256 + log2_meta_req_width = log2_meta_req_bytes + 8 - log2_bytes_per_element + - log2_meta_req_height; + meta_req_width = 1 << log2_meta_req_width; + meta_req_height = 1 << log2_meta_req_height; + log2_meta_row_height = 0; + meta_row_width_ub = 0; + + // the dimensions of a meta row are meta_row_width x meta_row_height in elements. + // calculate upper bound of the meta_row_width + if (!surf_vert) { + log2_meta_row_height = log2_meta_req_height; + meta_row_width_ub = dml_round_to_multiple(vp_width - 1, meta_req_width, 1) + + meta_req_width; + rq_dlg_param->meta_req_per_row_ub = meta_row_width_ub / meta_req_width; + } else { + log2_meta_row_height = log2_meta_req_width; + meta_row_width_ub = dml_round_to_multiple(vp_height - 1, meta_req_height, 1) + + meta_req_height; + rq_dlg_param->meta_req_per_row_ub = meta_row_width_ub / meta_req_height; + } + rq_dlg_param->meta_bytes_per_row_ub = rq_dlg_param->meta_req_per_row_ub * 64; + + rq_dlg_param->meta_row_height = 1 << log2_meta_row_height; + + log2_meta_chunk_bytes = dml_log2(rq_sizing_param->meta_chunk_bytes); + log2_meta_chunk_height = log2_meta_row_height; + + //full sized meta chunk width in unit of data elements + log2_meta_chunk_width = log2_meta_chunk_bytes + 8 - log2_bytes_per_element + - log2_meta_chunk_height; + log2_min_meta_chunk_bytes = dml_log2(rq_sizing_param->min_meta_chunk_bytes); + min_meta_chunk_width = 1 + << (log2_min_meta_chunk_bytes + 8 - log2_bytes_per_element + - log2_meta_chunk_height); + meta_chunk_width = 1 << log2_meta_chunk_width; + meta_chunk_per_row_int = (unsigned int) (meta_row_width_ub / meta_chunk_width); + meta_row_remainder = meta_row_width_ub % meta_chunk_width; + meta_chunk_threshold = 0; + meta_blk_bytes = 4096; + meta_blk_height = blk256_height * 64; + meta_blk_width = meta_blk_bytes * 256 / bytes_per_element / meta_blk_height; + meta_surface_bytes = meta_pitch + * (dml_round_to_multiple(vp_height - 1, meta_blk_height, 1) + meta_blk_height) + * bytes_per_element / 256; + vmpg_bytes = mode_lib->soc.vmm_page_size_bytes; + meta_pte_req_per_frame_ub = (dml_round_to_multiple(meta_surface_bytes - vmpg_bytes, + 8 * vmpg_bytes, + 1) + 8 * vmpg_bytes) / (8 * vmpg_bytes); + meta_pte_bytes_per_frame_ub = meta_pte_req_per_frame_ub * 64; //64B mpte request + rq_dlg_param->meta_pte_bytes_per_frame_ub = meta_pte_bytes_per_frame_ub; + + dml_print("DML_DLG: %s: meta_blk_height = %d\n", __func__, meta_blk_height); + dml_print("DML_DLG: %s: meta_blk_width = %d\n", __func__, meta_blk_width); + dml_print("DML_DLG: %s: meta_surface_bytes = %d\n", __func__, meta_surface_bytes); + dml_print("DML_DLG: %s: meta_pte_req_per_frame_ub = %d\n", + __func__, + meta_pte_req_per_frame_ub); + dml_print("DML_DLG: %s: meta_pte_bytes_per_frame_ub = %d\n", + __func__, + meta_pte_bytes_per_frame_ub); + + if (!surf_vert) + meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width; + else + meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height; + + if (meta_row_remainder <= meta_chunk_threshold) + rq_dlg_param->meta_chunks_per_row_ub = meta_chunk_per_row_int + 1; + else + rq_dlg_param->meta_chunks_per_row_ub = meta_chunk_per_row_int + 2; + + // ------ + // dpte + // ------ + if (surf_linear) { + log2_vmpg_height = 0; // one line high + } else { + log2_vmpg_height = (log2_vmpg_bytes - 8) / 2 + log2_blk256_height; + } + log2_vmpg_width = log2_vmpg_bytes - log2_bytes_per_element - log2_vmpg_height; + + // only 3 possible shapes for dpte request in dimensions of ptes: 8x1, 4x2, 2x4. + if (surf_linear) { //one 64B PTE request returns 8 PTEs + log2_dpte_req_height_ptes = 0; + log2_dpte_req_width = log2_vmpg_width + 3; + log2_dpte_req_height = 0; + } else if (log2_blk_bytes == 12) { //4KB tile means 4kB page size + //one 64B req gives 8x1 PTEs for 4KB tile + log2_dpte_req_height_ptes = 0; + log2_dpte_req_width = log2_blk_width + 3; + log2_dpte_req_height = log2_blk_height + 0; + } else if ((log2_blk_bytes >= 16) && (log2_vmpg_bytes == 12)) { // tile block >= 64KB + //two 64B reqs of 2x4 PTEs give 16 PTEs to cover 64KB + log2_dpte_req_height_ptes = 4; + log2_dpte_req_width = log2_blk256_width + 4; // log2_64KB_width + log2_dpte_req_height = log2_blk256_height + 4; // log2_64KB_height + } else { //64KB page size and must 64KB tile block + //one 64B req gives 8x1 PTEs for 64KB tile + log2_dpte_req_height_ptes = 0; + log2_dpte_req_width = log2_blk_width + 3; + log2_dpte_req_height = log2_blk_height + 0; + } + + // The dpte request dimensions in data elements is dpte_req_width x dpte_req_height + // log2_vmpg_width is how much 1 pte represent, now calculating how much a 64b pte req represent + // That depends on the pte shape (i.e. 8x1, 4x2, 2x4) + //log2_dpte_req_height = log2_vmpg_height + log2_dpte_req_height_ptes; + //log2_dpte_req_width = log2_vmpg_width + log2_dpte_req_width_ptes; + dpte_req_height = 1 << log2_dpte_req_height; + dpte_req_width = 1 << log2_dpte_req_width; + + // calculate pitch dpte row buffer can hold + // round the result down to a power of two. + pde_buf_entries = yuv420 ? (pde_proc_buffer_size_64k_reqs >> 1) : pde_proc_buffer_size_64k_reqs; + if (surf_linear) { + unsigned int dpte_row_height; + + log2_dpte_row_height_linear = dml_floor(dml_log2(dml_min(64 * 1024 * pde_buf_entries + / bytes_per_element, + dpte_buf_in_pte_reqs + * dpte_req_width) + / data_pitch), + 1); + + ASSERT(log2_dpte_row_height_linear >= 3); + + if (log2_dpte_row_height_linear > 7) + log2_dpte_row_height_linear = 7; + + log2_dpte_row_height = log2_dpte_row_height_linear; + // For linear, the dpte row is pitch dependent and the pte requests wrap at the pitch boundary. + // the dpte_row_width_ub is the upper bound of data_pitch*dpte_row_height in elements with this unique buffering. + dpte_row_height = 1 << log2_dpte_row_height; + dpte_row_width_ub = dml_round_to_multiple(data_pitch * dpte_row_height - 1, + dpte_req_width, + 1) + dpte_req_width; + rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_width; + } else { + // the upper bound of the dpte_row_width without dependency on viewport position follows. + // for tiled mode, row height is the same as req height and row store up to vp size upper bound + if (!surf_vert) { + log2_dpte_row_height = log2_dpte_req_height; + dpte_row_width_ub = dml_round_to_multiple(vp_width - 1, dpte_req_width, 1) + + dpte_req_width; + rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_width; + } else { + log2_dpte_row_height = + (log2_blk_width < log2_dpte_req_width) ? + log2_blk_width : log2_dpte_req_width; + dpte_row_width_ub = dml_round_to_multiple(vp_height - 1, dpte_req_height, 1) + + dpte_req_height; + rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_height; + } + } + if (log2_blk_bytes >= 16 && log2_vmpg_bytes == 12) // tile block >= 64KB + rq_dlg_param->dpte_bytes_per_row_ub = rq_dlg_param->dpte_req_per_row_ub * 128; //2*64B dpte request + else + rq_dlg_param->dpte_bytes_per_row_ub = rq_dlg_param->dpte_req_per_row_ub * 64; //64B dpte request + + rq_dlg_param->dpte_row_height = 1 << log2_dpte_row_height; + + // the dpte_group_bytes is reduced for the specific case of vertical + // access of a tile surface that has dpte request of 8x1 ptes. + if (!surf_linear & (log2_dpte_req_height_ptes == 0) & surf_vert) //reduced, in this case, will have page fault within a group + rq_sizing_param->dpte_group_bytes = 512; + else + //full size + rq_sizing_param->dpte_group_bytes = 2048; + + //since pte request size is 64byte, the number of data pte requests per full sized group is as follows. + log2_dpte_group_bytes = dml_log2(rq_sizing_param->dpte_group_bytes); + log2_dpte_group_length = log2_dpte_group_bytes - 6; //length in 64b requests + + // full sized data pte group width in elements + if (!surf_vert) + log2_dpte_group_width = log2_dpte_group_length + log2_dpte_req_width; + else + log2_dpte_group_width = log2_dpte_group_length + log2_dpte_req_height; + + //But if the tile block >=64KB and the page size is 4KB, then each dPTE request is 2*64B + if ((log2_blk_bytes >= 16) && (log2_vmpg_bytes == 12)) // tile block >= 64KB + log2_dpte_group_width = log2_dpte_group_width - 1; + + dpte_group_width = 1 << log2_dpte_group_width; + + // since dpte groups are only aligned to dpte_req_width and not dpte_group_width, + // the upper bound for the dpte groups per row is as follows. + rq_dlg_param->dpte_groups_per_row_ub = dml_ceil((double) dpte_row_width_ub / dpte_group_width, + 1); +} + +static void get_surf_rq_param(struct display_mode_lib *mode_lib, + display_data_rq_sizing_params_st *rq_sizing_param, + display_data_rq_dlg_params_st *rq_dlg_param, + display_data_rq_misc_params_st *rq_misc_param, + const display_pipe_source_params_st *pipe_src_param, + bool is_chroma) +{ + bool mode_422 = false; + unsigned int vp_width = 0; + unsigned int vp_height = 0; + unsigned int data_pitch = 0; + unsigned int meta_pitch = 0; + unsigned int ppe = mode_422 ? 2 : 1; + + // TODO check if ppe apply for both luma and chroma in 422 case + if (is_chroma) { + vp_width = pipe_src_param->viewport_width_c / ppe; + vp_height = pipe_src_param->viewport_height_c; + data_pitch = pipe_src_param->data_pitch_c; + meta_pitch = pipe_src_param->meta_pitch_c; + } else { + vp_width = pipe_src_param->viewport_width / ppe; + vp_height = pipe_src_param->viewport_height; + data_pitch = pipe_src_param->data_pitch; + meta_pitch = pipe_src_param->meta_pitch; + } + + rq_sizing_param->chunk_bytes = 8192; + + if (rq_sizing_param->chunk_bytes == 64 * 1024) + rq_sizing_param->min_chunk_bytes = 0; + else + rq_sizing_param->min_chunk_bytes = 1024; + + rq_sizing_param->meta_chunk_bytes = 2048; + rq_sizing_param->min_meta_chunk_bytes = 256; + + rq_sizing_param->mpte_group_bytes = 2048; + + get_meta_and_pte_attr(mode_lib, + rq_dlg_param, + rq_misc_param, + rq_sizing_param, + vp_width, + vp_height, + data_pitch, + meta_pitch, + pipe_src_param->source_format, + pipe_src_param->sw_mode, + pipe_src_param->macro_tile_size, + pipe_src_param->source_scan, + is_chroma); +} + +static void dml20v2_rq_dlg_get_rq_params(struct display_mode_lib *mode_lib, + display_rq_params_st *rq_param, + const display_pipe_source_params_st *pipe_src_param) +{ + // get param for luma surface + rq_param->yuv420 = pipe_src_param->source_format == dm_420_8 + || pipe_src_param->source_format == dm_420_10; + rq_param->yuv420_10bpc = pipe_src_param->source_format == dm_420_10; + + get_surf_rq_param(mode_lib, + &(rq_param->sizing.rq_l), + &(rq_param->dlg.rq_l), + &(rq_param->misc.rq_l), + pipe_src_param, + 0); + + if (is_dual_plane((enum source_format_class)(pipe_src_param->source_format))) { + // get param for chroma surface + get_surf_rq_param(mode_lib, + &(rq_param->sizing.rq_c), + &(rq_param->dlg.rq_c), + &(rq_param->misc.rq_c), + pipe_src_param, + 1); + } + + // calculate how to split the det buffer space between luma and chroma + handle_det_buf_split(mode_lib, rq_param, pipe_src_param); + print__rq_params_st(mode_lib, rq_param); +} + +void dml20v2_rq_dlg_get_rq_reg(struct display_mode_lib *mode_lib, + display_rq_regs_st *rq_regs, + const display_pipe_params_st *pipe_param) +{ + display_rq_params_st rq_param = {0}; + + memset(rq_regs, 0, sizeof(*rq_regs)); + dml20v2_rq_dlg_get_rq_params(mode_lib, &rq_param, &pipe_param->src); + extract_rq_regs(mode_lib, rq_regs, &rq_param); + + print__rq_regs_st(mode_lib, rq_regs); +} + +// Note: currently taken in as is. +// Nice to decouple code from hw register implement and extract code that are repeated for luma and chroma. +static void dml20v2_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib, + const display_e2e_pipe_params_st *e2e_pipe_param, + const unsigned int num_pipes, + const unsigned int pipe_idx, + display_dlg_regs_st *disp_dlg_regs, + display_ttu_regs_st *disp_ttu_regs, + const display_rq_dlg_params_st *rq_dlg_param, + const display_dlg_sys_params_st *dlg_sys_param, + const bool cstate_en, + const bool pstate_en) +{ + const display_pipe_source_params_st *src = &e2e_pipe_param[pipe_idx].pipe.src; + const display_pipe_dest_params_st *dst = &e2e_pipe_param[pipe_idx].pipe.dest; + const display_output_params_st *dout = &e2e_pipe_param[pipe_idx].dout; + const display_clocks_and_cfg_st *clks = &e2e_pipe_param[pipe_idx].clks_cfg; + const scaler_ratio_depth_st *scl = &e2e_pipe_param[pipe_idx].pipe.scale_ratio_depth; + const scaler_taps_st *taps = &e2e_pipe_param[pipe_idx].pipe.scale_taps; + + // ------------------------- + // Section 1.15.2.1: OTG dependent Params + // ------------------------- + // Timing + unsigned int htotal = dst->htotal; +// unsigned int hblank_start = dst.hblank_start; // TODO: Remove + unsigned int hblank_end = dst->hblank_end; + unsigned int vblank_start = dst->vblank_start; + unsigned int vblank_end = dst->vblank_end; + unsigned int min_vblank = mode_lib->ip.min_vblank_lines; + + double dppclk_freq_in_mhz = clks->dppclk_mhz; + double dispclk_freq_in_mhz = clks->dispclk_mhz; + double refclk_freq_in_mhz = clks->refclk_mhz; + double pclk_freq_in_mhz = dst->pixel_rate_mhz; + bool interlaced = dst->interlaced; + + double ref_freq_to_pix_freq = refclk_freq_in_mhz / pclk_freq_in_mhz; + + double min_dcfclk_mhz; + double t_calc_us; + double min_ttu_vblank; + + double min_dst_y_ttu_vblank; + unsigned int dlg_vblank_start; + bool dual_plane; + bool mode_422; + unsigned int access_dir; + unsigned int vp_height_l; + unsigned int vp_width_l; + unsigned int vp_height_c; + unsigned int vp_width_c; + + // Scaling + unsigned int htaps_l; + unsigned int htaps_c; + double hratio_l; + double hratio_c; + double vratio_l; + double vratio_c; + bool scl_enable; + + double line_time_in_us; + // double vinit_l; + // double vinit_c; + // double vinit_bot_l; + // double vinit_bot_c; + + // unsigned int swath_height_l; + unsigned int swath_width_ub_l; + // unsigned int dpte_bytes_per_row_ub_l; + unsigned int dpte_groups_per_row_ub_l; + // unsigned int meta_pte_bytes_per_frame_ub_l; + // unsigned int meta_bytes_per_row_ub_l; + + // unsigned int swath_height_c; + unsigned int swath_width_ub_c; + // unsigned int dpte_bytes_per_row_ub_c; + unsigned int dpte_groups_per_row_ub_c; + + unsigned int meta_chunks_per_row_ub_l; + unsigned int meta_chunks_per_row_ub_c; + unsigned int vupdate_offset; + unsigned int vupdate_width; + unsigned int vready_offset; + + unsigned int dppclk_delay_subtotal; + unsigned int dispclk_delay_subtotal; + unsigned int pixel_rate_delay_subtotal; + + unsigned int vstartup_start; + unsigned int dst_x_after_scaler; + unsigned int dst_y_after_scaler; + double line_wait; + double dst_y_prefetch; + double dst_y_per_vm_vblank; + double dst_y_per_row_vblank; + double dst_y_per_vm_flip; + double dst_y_per_row_flip; + double min_dst_y_per_vm_vblank; + double min_dst_y_per_row_vblank; + double lsw; + double vratio_pre_l; + double vratio_pre_c; + unsigned int req_per_swath_ub_l; + unsigned int req_per_swath_ub_c; + unsigned int meta_row_height_l; + unsigned int meta_row_height_c; + unsigned int swath_width_pixels_ub_l; + unsigned int swath_width_pixels_ub_c; + unsigned int scaler_rec_in_width_l; + unsigned int scaler_rec_in_width_c; + unsigned int dpte_row_height_l; + unsigned int dpte_row_height_c; + double hscale_pixel_rate_l; + double hscale_pixel_rate_c; + double min_hratio_fact_l; + double min_hratio_fact_c; + double refcyc_per_line_delivery_pre_l; + double refcyc_per_line_delivery_pre_c; + double refcyc_per_line_delivery_l; + double refcyc_per_line_delivery_c; + + double refcyc_per_req_delivery_pre_l; + double refcyc_per_req_delivery_pre_c; + double refcyc_per_req_delivery_l; + double refcyc_per_req_delivery_c; + + unsigned int full_recout_width; + double refcyc_per_req_delivery_pre_cur0; + double refcyc_per_req_delivery_cur0; + double refcyc_per_req_delivery_pre_cur1; + double refcyc_per_req_delivery_cur1; + + memset(disp_dlg_regs, 0, sizeof(*disp_dlg_regs)); + memset(disp_ttu_regs, 0, sizeof(*disp_ttu_regs)); + + dml_print("DML_DLG: %s: cstate_en = %d\n", __func__, cstate_en); + dml_print("DML_DLG: %s: pstate_en = %d\n", __func__, pstate_en); + + dml_print("DML_DLG: %s: dppclk_freq_in_mhz = %3.2f\n", __func__, dppclk_freq_in_mhz); + dml_print("DML_DLG: %s: dispclk_freq_in_mhz = %3.2f\n", __func__, dispclk_freq_in_mhz); + dml_print("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, refclk_freq_in_mhz); + dml_print("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, pclk_freq_in_mhz); + dml_print("DML_DLG: %s: interlaced = %d\n", __func__, interlaced); + ASSERT(ref_freq_to_pix_freq < 4.0); + + disp_dlg_regs->ref_freq_to_pix_freq = + (unsigned int) (ref_freq_to_pix_freq * dml_pow(2, 19)); + disp_dlg_regs->refcyc_per_htotal = (unsigned int) (ref_freq_to_pix_freq * (double) htotal + * dml_pow(2, 8)); + disp_dlg_regs->dlg_vblank_end = interlaced ? (vblank_end / 2) : vblank_end; // 15 bits + disp_dlg_regs->refcyc_h_blank_end = (unsigned int) ((double) hblank_end + * (double) ref_freq_to_pix_freq); + ASSERT(disp_dlg_regs->refcyc_h_blank_end < (unsigned int) dml_pow(2, 13)); + + min_dcfclk_mhz = dlg_sys_param->deepsleep_dcfclk_mhz; + t_calc_us = get_tcalc(mode_lib, e2e_pipe_param, num_pipes); + min_ttu_vblank = get_min_ttu_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + + min_dst_y_ttu_vblank = min_ttu_vblank * pclk_freq_in_mhz / (double) htotal; + dlg_vblank_start = interlaced ? (vblank_start / 2) : vblank_start; + + disp_dlg_regs->min_dst_y_next_start = (unsigned int) (((double) dlg_vblank_start + + min_dst_y_ttu_vblank) * dml_pow(2, 2)); + ASSERT(disp_dlg_regs->min_dst_y_next_start < (unsigned int) dml_pow(2, 18)); + + dml_print("DML_DLG: %s: min_dcfclk_mhz = %3.2f\n", + __func__, + min_dcfclk_mhz); + dml_print("DML_DLG: %s: min_ttu_vblank = %3.2f\n", + __func__, + min_ttu_vblank); + dml_print("DML_DLG: %s: min_dst_y_ttu_vblank = %3.2f\n", + __func__, + min_dst_y_ttu_vblank); + dml_print("DML_DLG: %s: t_calc_us = %3.2f\n", + __func__, + t_calc_us); + dml_print("DML_DLG: %s: disp_dlg_regs->min_dst_y_next_start = 0x%0x\n", + __func__, + disp_dlg_regs->min_dst_y_next_start); + dml_print("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", + __func__, + ref_freq_to_pix_freq); + + // ------------------------- + // Section 1.15.2.2: Prefetch, Active and TTU + // ------------------------- + // Prefetch Calc + // Source +// dcc_en = src.dcc; + dual_plane = is_dual_plane((enum source_format_class)(src->source_format)); + mode_422 = false; // TODO + access_dir = (src->source_scan == dm_vert); // vp access direction: horizontal or vertical accessed +// bytes_per_element_l = get_bytes_per_element(source_format_class(src.source_format), 0); +// bytes_per_element_c = get_bytes_per_element(source_format_class(src.source_format), 1); + vp_height_l = src->viewport_height; + vp_width_l = src->viewport_width; + vp_height_c = src->viewport_height_c; + vp_width_c = src->viewport_width_c; + + // Scaling + htaps_l = taps->htaps; + htaps_c = taps->htaps_c; + hratio_l = scl->hscl_ratio; + hratio_c = scl->hscl_ratio_c; + vratio_l = scl->vscl_ratio; + vratio_c = scl->vscl_ratio_c; + scl_enable = scl->scl_enable; + + line_time_in_us = (htotal / pclk_freq_in_mhz); +// vinit_l = scl.vinit; +// vinit_c = scl.vinit_c; +// vinit_bot_l = scl.vinit_bot; +// vinit_bot_c = scl.vinit_bot_c; + +// unsigned int swath_height_l = rq_dlg_param->rq_l.swath_height; + swath_width_ub_l = rq_dlg_param->rq_l.swath_width_ub; +// unsigned int dpte_bytes_per_row_ub_l = rq_dlg_param->rq_l.dpte_bytes_per_row_ub; + dpte_groups_per_row_ub_l = rq_dlg_param->rq_l.dpte_groups_per_row_ub; +// unsigned int meta_pte_bytes_per_frame_ub_l = rq_dlg_param->rq_l.meta_pte_bytes_per_frame_ub; +// unsigned int meta_bytes_per_row_ub_l = rq_dlg_param->rq_l.meta_bytes_per_row_ub; + +// unsigned int swath_height_c = rq_dlg_param->rq_c.swath_height; + swath_width_ub_c = rq_dlg_param->rq_c.swath_width_ub; + // dpte_bytes_per_row_ub_c = rq_dlg_param->rq_c.dpte_bytes_per_row_ub; + dpte_groups_per_row_ub_c = rq_dlg_param->rq_c.dpte_groups_per_row_ub; + + meta_chunks_per_row_ub_l = rq_dlg_param->rq_l.meta_chunks_per_row_ub; + meta_chunks_per_row_ub_c = rq_dlg_param->rq_c.meta_chunks_per_row_ub; + vupdate_offset = dst->vupdate_offset; + vupdate_width = dst->vupdate_width; + vready_offset = dst->vready_offset; + + dppclk_delay_subtotal = mode_lib->ip.dppclk_delay_subtotal; + dispclk_delay_subtotal = mode_lib->ip.dispclk_delay_subtotal; + + if (scl_enable) + dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_scl; + else + dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_scl_lb_only; + + dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_cnvc_formatter + + src->num_cursors * mode_lib->ip.dppclk_delay_cnvc_cursor; + + if (dout->dsc_enable) { + double dsc_delay = get_dsc_delay(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + + dispclk_delay_subtotal += dsc_delay; + } + + pixel_rate_delay_subtotal = dppclk_delay_subtotal * pclk_freq_in_mhz / dppclk_freq_in_mhz + + dispclk_delay_subtotal * pclk_freq_in_mhz / dispclk_freq_in_mhz; + + vstartup_start = dst->vstartup_start; + if (interlaced) { + if (vstartup_start / 2.0 + - (double) (vready_offset + vupdate_width + vupdate_offset) / htotal + <= vblank_end / 2.0) + disp_dlg_regs->vready_after_vcount0 = 1; + else + disp_dlg_regs->vready_after_vcount0 = 0; + } else { + if (vstartup_start + - (double) (vready_offset + vupdate_width + vupdate_offset) / htotal + <= vblank_end) + disp_dlg_regs->vready_after_vcount0 = 1; + else + disp_dlg_regs->vready_after_vcount0 = 0; + } + + // TODO: Where is this coming from? + if (interlaced) + vstartup_start = vstartup_start / 2; + + // TODO: What if this min_vblank doesn't match the value in the dml_config_settings.cpp? + if (vstartup_start >= min_vblank) { + dml_print("WARNING: DML_DLG: %s: vblank_start=%d vblank_end=%d\n", + __func__, + vblank_start, + vblank_end); + dml_print("WARNING: DML_DLG: %s: vstartup_start=%d should be less than min_vblank=%d\n", + __func__, + vstartup_start, + min_vblank); + min_vblank = vstartup_start + 1; + dml_print("WARNING: DML_DLG: %s: vstartup_start=%d should be less than min_vblank=%d\n", + __func__, + vstartup_start, + min_vblank); + } + + dst_x_after_scaler = get_dst_x_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + dst_y_after_scaler = get_dst_y_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + + dml_print("DML_DLG: %s: htotal = %d\n", __func__, htotal); + dml_print("DML_DLG: %s: pixel_rate_delay_subtotal = %d\n", + __func__, + pixel_rate_delay_subtotal); + dml_print("DML_DLG: %s: dst_x_after_scaler = %d\n", + __func__, + dst_x_after_scaler); + dml_print("DML_DLG: %s: dst_y_after_scaler = %d\n", + __func__, + dst_y_after_scaler); + + // Lwait + line_wait = mode_lib->soc.urgent_latency_us; + if (cstate_en) + line_wait = dml_max(mode_lib->soc.sr_enter_plus_exit_time_us, line_wait); + if (pstate_en) + line_wait = dml_max(mode_lib->soc.dram_clock_change_latency_us + + mode_lib->soc.urgent_latency_us, + line_wait); + line_wait = line_wait / line_time_in_us; + + dst_y_prefetch = get_dst_y_prefetch(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + dml_print("DML_DLG: %s: dst_y_prefetch (after rnd) = %3.2f\n", __func__, dst_y_prefetch); + + dst_y_per_vm_vblank = get_dst_y_per_vm_vblank(mode_lib, + e2e_pipe_param, + num_pipes, + pipe_idx); + dst_y_per_row_vblank = get_dst_y_per_row_vblank(mode_lib, + e2e_pipe_param, + num_pipes, + pipe_idx); + dst_y_per_vm_flip = get_dst_y_per_vm_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + dst_y_per_row_flip = get_dst_y_per_row_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + + min_dst_y_per_vm_vblank = 8.0; + min_dst_y_per_row_vblank = 16.0; + + // magic! + if (htotal <= 75) { + min_vblank = 300; + min_dst_y_per_vm_vblank = 100.0; + min_dst_y_per_row_vblank = 100.0; + } + + dml_print("DML_DLG: %s: dst_y_per_vm_vblank = %3.2f\n", __func__, dst_y_per_vm_vblank); + dml_print("DML_DLG: %s: dst_y_per_row_vblank = %3.2f\n", __func__, dst_y_per_row_vblank); + + ASSERT(dst_y_per_vm_vblank < min_dst_y_per_vm_vblank); + ASSERT(dst_y_per_row_vblank < min_dst_y_per_row_vblank); + + ASSERT(dst_y_prefetch > (dst_y_per_vm_vblank + dst_y_per_row_vblank)); + lsw = dst_y_prefetch - (dst_y_per_vm_vblank + dst_y_per_row_vblank); + + dml_print("DML_DLG: %s: lsw = %3.2f\n", __func__, lsw); + + vratio_pre_l = get_vratio_prefetch_l(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + vratio_pre_c = get_vratio_prefetch_c(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + + dml_print("DML_DLG: %s: vratio_pre_l=%3.2f\n", __func__, vratio_pre_l); + dml_print("DML_DLG: %s: vratio_pre_c=%3.2f\n", __func__, vratio_pre_c); + + // Active + req_per_swath_ub_l = rq_dlg_param->rq_l.req_per_swath_ub; + req_per_swath_ub_c = rq_dlg_param->rq_c.req_per_swath_ub; + meta_row_height_l = rq_dlg_param->rq_l.meta_row_height; + meta_row_height_c = rq_dlg_param->rq_c.meta_row_height; + swath_width_pixels_ub_l = 0; + swath_width_pixels_ub_c = 0; + scaler_rec_in_width_l = 0; + scaler_rec_in_width_c = 0; + dpte_row_height_l = rq_dlg_param->rq_l.dpte_row_height; + dpte_row_height_c = rq_dlg_param->rq_c.dpte_row_height; + + if (mode_422) { + swath_width_pixels_ub_l = swath_width_ub_l * 2; // *2 for 2 pixel per element + swath_width_pixels_ub_c = swath_width_ub_c * 2; + } else { + swath_width_pixels_ub_l = swath_width_ub_l * 1; + swath_width_pixels_ub_c = swath_width_ub_c * 1; + } + + hscale_pixel_rate_l = 0.; + hscale_pixel_rate_c = 0.; + min_hratio_fact_l = 1.0; + min_hratio_fact_c = 1.0; + + if (htaps_l <= 1) + min_hratio_fact_l = 2.0; + else if (htaps_l <= 6) { + if ((hratio_l * 2.0) > 4.0) + min_hratio_fact_l = 4.0; + else + min_hratio_fact_l = hratio_l * 2.0; + } else { + if (hratio_l > 4.0) + min_hratio_fact_l = 4.0; + else + min_hratio_fact_l = hratio_l; + } + + hscale_pixel_rate_l = min_hratio_fact_l * dppclk_freq_in_mhz; + + if (htaps_c <= 1) + min_hratio_fact_c = 2.0; + else if (htaps_c <= 6) { + if ((hratio_c * 2.0) > 4.0) + min_hratio_fact_c = 4.0; + else + min_hratio_fact_c = hratio_c * 2.0; + } else { + if (hratio_c > 4.0) + min_hratio_fact_c = 4.0; + else + min_hratio_fact_c = hratio_c; + } + + hscale_pixel_rate_c = min_hratio_fact_c * dppclk_freq_in_mhz; + + refcyc_per_line_delivery_pre_l = 0.; + refcyc_per_line_delivery_pre_c = 0.; + refcyc_per_line_delivery_l = 0.; + refcyc_per_line_delivery_c = 0.; + + refcyc_per_req_delivery_pre_l = 0.; + refcyc_per_req_delivery_pre_c = 0.; + refcyc_per_req_delivery_l = 0.; + refcyc_per_req_delivery_c = 0.; + + full_recout_width = 0; + // In ODM + if (src->is_hsplit) { + // This "hack" is only allowed (and valid) for MPC combine. In ODM + // combine, you MUST specify the full_recout_width...according to Oswin + if (dst->full_recout_width == 0 && !dst->odm_combine) { + dml_print("DML_DLG: %s: Warning: full_recout_width not set in hsplit mode\n", + __func__); + full_recout_width = dst->recout_width * 2; // assume half split for dcn1 + } else + full_recout_width = dst->full_recout_width; + } else + full_recout_width = dst->recout_width; + + // As of DCN2, mpc_combine and odm_combine are mutually exclusive + refcyc_per_line_delivery_pre_l = get_refcyc_per_delivery(mode_lib, + refclk_freq_in_mhz, + pclk_freq_in_mhz, + dst->odm_combine, + full_recout_width, + dst->hactive, + vratio_pre_l, + hscale_pixel_rate_l, + swath_width_pixels_ub_l, + 1); // per line + + refcyc_per_line_delivery_l = get_refcyc_per_delivery(mode_lib, + refclk_freq_in_mhz, + pclk_freq_in_mhz, + dst->odm_combine, + full_recout_width, + dst->hactive, + vratio_l, + hscale_pixel_rate_l, + swath_width_pixels_ub_l, + 1); // per line + + dml_print("DML_DLG: %s: full_recout_width = %d\n", + __func__, + full_recout_width); + dml_print("DML_DLG: %s: hscale_pixel_rate_l = %3.2f\n", + __func__, + hscale_pixel_rate_l); + dml_print("DML_DLG: %s: refcyc_per_line_delivery_pre_l = %3.2f\n", + __func__, + refcyc_per_line_delivery_pre_l); + dml_print("DML_DLG: %s: refcyc_per_line_delivery_l = %3.2f\n", + __func__, + refcyc_per_line_delivery_l); + + if (dual_plane) { + refcyc_per_line_delivery_pre_c = get_refcyc_per_delivery(mode_lib, + refclk_freq_in_mhz, + pclk_freq_in_mhz, + dst->odm_combine, + full_recout_width, + dst->hactive, + vratio_pre_c, + hscale_pixel_rate_c, + swath_width_pixels_ub_c, + 1); // per line + + refcyc_per_line_delivery_c = get_refcyc_per_delivery(mode_lib, + refclk_freq_in_mhz, + pclk_freq_in_mhz, + dst->odm_combine, + full_recout_width, + dst->hactive, + vratio_c, + hscale_pixel_rate_c, + swath_width_pixels_ub_c, + 1); // per line + + dml_print("DML_DLG: %s: refcyc_per_line_delivery_pre_c = %3.2f\n", + __func__, + refcyc_per_line_delivery_pre_c); + dml_print("DML_DLG: %s: refcyc_per_line_delivery_c = %3.2f\n", + __func__, + refcyc_per_line_delivery_c); + } + + // TTU - Luma / Chroma + if (access_dir) { // vertical access + scaler_rec_in_width_l = vp_height_l; + scaler_rec_in_width_c = vp_height_c; + } else { + scaler_rec_in_width_l = vp_width_l; + scaler_rec_in_width_c = vp_width_c; + } + + refcyc_per_req_delivery_pre_l = get_refcyc_per_delivery(mode_lib, + refclk_freq_in_mhz, + pclk_freq_in_mhz, + dst->odm_combine, + full_recout_width, + dst->hactive, + vratio_pre_l, + hscale_pixel_rate_l, + scaler_rec_in_width_l, + req_per_swath_ub_l); // per req + refcyc_per_req_delivery_l = get_refcyc_per_delivery(mode_lib, + refclk_freq_in_mhz, + pclk_freq_in_mhz, + dst->odm_combine, + full_recout_width, + dst->hactive, + vratio_l, + hscale_pixel_rate_l, + scaler_rec_in_width_l, + req_per_swath_ub_l); // per req + + dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_l = %3.2f\n", + __func__, + refcyc_per_req_delivery_pre_l); + dml_print("DML_DLG: %s: refcyc_per_req_delivery_l = %3.2f\n", + __func__, + refcyc_per_req_delivery_l); + + ASSERT(refcyc_per_req_delivery_pre_l < dml_pow(2, 13)); + ASSERT(refcyc_per_req_delivery_l < dml_pow(2, 13)); + + if (dual_plane) { + refcyc_per_req_delivery_pre_c = get_refcyc_per_delivery(mode_lib, + refclk_freq_in_mhz, + pclk_freq_in_mhz, + dst->odm_combine, + full_recout_width, + dst->hactive, + vratio_pre_c, + hscale_pixel_rate_c, + scaler_rec_in_width_c, + req_per_swath_ub_c); // per req + refcyc_per_req_delivery_c = get_refcyc_per_delivery(mode_lib, + refclk_freq_in_mhz, + pclk_freq_in_mhz, + dst->odm_combine, + full_recout_width, + dst->hactive, + vratio_c, + hscale_pixel_rate_c, + scaler_rec_in_width_c, + req_per_swath_ub_c); // per req + + dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_c = %3.2f\n", + __func__, + refcyc_per_req_delivery_pre_c); + dml_print("DML_DLG: %s: refcyc_per_req_delivery_c = %3.2f\n", + __func__, + refcyc_per_req_delivery_c); + + ASSERT(refcyc_per_req_delivery_pre_c < dml_pow(2, 13)); + ASSERT(refcyc_per_req_delivery_c < dml_pow(2, 13)); + } + + // TTU - Cursor + refcyc_per_req_delivery_pre_cur0 = 0.0; + refcyc_per_req_delivery_cur0 = 0.0; + if (src->num_cursors > 0) { + calculate_ttu_cursor(mode_lib, + &refcyc_per_req_delivery_pre_cur0, + &refcyc_per_req_delivery_cur0, + refclk_freq_in_mhz, + ref_freq_to_pix_freq, + hscale_pixel_rate_l, + scl->hscl_ratio, + vratio_pre_l, + vratio_l, + src->cur0_src_width, + (enum cursor_bpp)(src->cur0_bpp)); + } + + refcyc_per_req_delivery_pre_cur1 = 0.0; + refcyc_per_req_delivery_cur1 = 0.0; + if (src->num_cursors > 1) { + calculate_ttu_cursor(mode_lib, + &refcyc_per_req_delivery_pre_cur1, + &refcyc_per_req_delivery_cur1, + refclk_freq_in_mhz, + ref_freq_to_pix_freq, + hscale_pixel_rate_l, + scl->hscl_ratio, + vratio_pre_l, + vratio_l, + src->cur1_src_width, + (enum cursor_bpp)(src->cur1_bpp)); + } + + // TTU - Misc + // all hard-coded + + // Assignment to register structures + disp_dlg_regs->dst_y_after_scaler = dst_y_after_scaler; // in terms of line + disp_dlg_regs->refcyc_x_after_scaler = dst_x_after_scaler * ref_freq_to_pix_freq; // in terms of refclk + ASSERT(disp_dlg_regs->refcyc_x_after_scaler < (unsigned int) dml_pow(2, 13)); + disp_dlg_regs->dst_y_prefetch = (unsigned int) (dst_y_prefetch * dml_pow(2, 2)); + disp_dlg_regs->dst_y_per_vm_vblank = (unsigned int) (dst_y_per_vm_vblank * dml_pow(2, 2)); + disp_dlg_regs->dst_y_per_row_vblank = (unsigned int) (dst_y_per_row_vblank * dml_pow(2, 2)); + disp_dlg_regs->dst_y_per_vm_flip = (unsigned int) (dst_y_per_vm_flip * dml_pow(2, 2)); + disp_dlg_regs->dst_y_per_row_flip = (unsigned int) (dst_y_per_row_flip * dml_pow(2, 2)); + + disp_dlg_regs->vratio_prefetch = (unsigned int) (vratio_pre_l * dml_pow(2, 19)); + disp_dlg_regs->vratio_prefetch_c = (unsigned int) (vratio_pre_c * dml_pow(2, 19)); + + disp_dlg_regs->refcyc_per_pte_group_vblank_l = + (unsigned int) (dst_y_per_row_vblank * (double) htotal + * ref_freq_to_pix_freq / (double) dpte_groups_per_row_ub_l); + ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_l < (unsigned int) dml_pow(2, 13)); + + if (dual_plane) { + disp_dlg_regs->refcyc_per_pte_group_vblank_c = (unsigned int) (dst_y_per_row_vblank + * (double) htotal * ref_freq_to_pix_freq + / (double) dpte_groups_per_row_ub_c); + ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_c + < (unsigned int) dml_pow(2, 13)); + } + + disp_dlg_regs->refcyc_per_meta_chunk_vblank_l = + (unsigned int) (dst_y_per_row_vblank * (double) htotal + * ref_freq_to_pix_freq / (double) meta_chunks_per_row_ub_l); + ASSERT(disp_dlg_regs->refcyc_per_meta_chunk_vblank_l < (unsigned int) dml_pow(2, 13)); + + disp_dlg_regs->refcyc_per_meta_chunk_vblank_c = + disp_dlg_regs->refcyc_per_meta_chunk_vblank_l; // dcc for 4:2:0 is not supported in dcn1.0. assigned to be the same as _l for now + + disp_dlg_regs->refcyc_per_pte_group_flip_l = (unsigned int) (dst_y_per_row_flip * htotal + * ref_freq_to_pix_freq) / dpte_groups_per_row_ub_l; + disp_dlg_regs->refcyc_per_meta_chunk_flip_l = (unsigned int) (dst_y_per_row_flip * htotal + * ref_freq_to_pix_freq) / meta_chunks_per_row_ub_l; + + if (dual_plane) { + disp_dlg_regs->refcyc_per_pte_group_flip_c = (unsigned int) (dst_y_per_row_flip + * htotal * ref_freq_to_pix_freq) / dpte_groups_per_row_ub_c; + disp_dlg_regs->refcyc_per_meta_chunk_flip_c = (unsigned int) (dst_y_per_row_flip + * htotal * ref_freq_to_pix_freq) / meta_chunks_per_row_ub_c; + } + + disp_dlg_regs->dst_y_per_pte_row_nom_l = (unsigned int) ((double) dpte_row_height_l + / (double) vratio_l * dml_pow(2, 2)); + ASSERT(disp_dlg_regs->dst_y_per_pte_row_nom_l < (unsigned int) dml_pow(2, 17)); + + if (dual_plane) { + disp_dlg_regs->dst_y_per_pte_row_nom_c = (unsigned int) ((double) dpte_row_height_c + / (double) vratio_c * dml_pow(2, 2)); + if (disp_dlg_regs->dst_y_per_pte_row_nom_c >= (unsigned int) dml_pow(2, 17)) { + dml_print("DML_DLG: %s: Warning dst_y_per_pte_row_nom_c %u larger than supported by register format U15.2 %u\n", + __func__, + disp_dlg_regs->dst_y_per_pte_row_nom_c, + (unsigned int) dml_pow(2, 17) - 1); + } + } + + disp_dlg_regs->dst_y_per_meta_row_nom_l = (unsigned int) ((double) meta_row_height_l + / (double) vratio_l * dml_pow(2, 2)); + ASSERT(disp_dlg_regs->dst_y_per_meta_row_nom_l < (unsigned int) dml_pow(2, 17)); + + disp_dlg_regs->dst_y_per_meta_row_nom_c = disp_dlg_regs->dst_y_per_meta_row_nom_l; // TODO: dcc for 4:2:0 is not supported in dcn1.0. assigned to be the same as _l for now + + disp_dlg_regs->refcyc_per_pte_group_nom_l = (unsigned int) ((double) dpte_row_height_l + / (double) vratio_l * (double) htotal * ref_freq_to_pix_freq + / (double) dpte_groups_per_row_ub_l); + if (disp_dlg_regs->refcyc_per_pte_group_nom_l >= (unsigned int) dml_pow(2, 23)) + disp_dlg_regs->refcyc_per_pte_group_nom_l = dml_pow(2, 23) - 1; + disp_dlg_regs->refcyc_per_meta_chunk_nom_l = (unsigned int) ((double) meta_row_height_l + / (double) vratio_l * (double) htotal * ref_freq_to_pix_freq + / (double) meta_chunks_per_row_ub_l); + if (disp_dlg_regs->refcyc_per_meta_chunk_nom_l >= (unsigned int) dml_pow(2, 23)) + disp_dlg_regs->refcyc_per_meta_chunk_nom_l = dml_pow(2, 23) - 1; + + if (dual_plane) { + disp_dlg_regs->refcyc_per_pte_group_nom_c = + (unsigned int) ((double) dpte_row_height_c / (double) vratio_c + * (double) htotal * ref_freq_to_pix_freq + / (double) dpte_groups_per_row_ub_c); + if (disp_dlg_regs->refcyc_per_pte_group_nom_c >= (unsigned int) dml_pow(2, 23)) + disp_dlg_regs->refcyc_per_pte_group_nom_c = dml_pow(2, 23) - 1; + + // TODO: Is this the right calculation? Does htotal need to be halved? + disp_dlg_regs->refcyc_per_meta_chunk_nom_c = + (unsigned int) ((double) meta_row_height_c / (double) vratio_c + * (double) htotal * ref_freq_to_pix_freq + / (double) meta_chunks_per_row_ub_c); + if (disp_dlg_regs->refcyc_per_meta_chunk_nom_c >= (unsigned int) dml_pow(2, 23)) + disp_dlg_regs->refcyc_per_meta_chunk_nom_c = dml_pow(2, 23) - 1; + } + + disp_dlg_regs->refcyc_per_line_delivery_pre_l = (unsigned int) dml_floor(refcyc_per_line_delivery_pre_l, + 1); + disp_dlg_regs->refcyc_per_line_delivery_l = (unsigned int) dml_floor(refcyc_per_line_delivery_l, + 1); + ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_l < (unsigned int) dml_pow(2, 13)); + ASSERT(disp_dlg_regs->refcyc_per_line_delivery_l < (unsigned int) dml_pow(2, 13)); + + disp_dlg_regs->refcyc_per_line_delivery_pre_c = (unsigned int) dml_floor(refcyc_per_line_delivery_pre_c, + 1); + disp_dlg_regs->refcyc_per_line_delivery_c = (unsigned int) dml_floor(refcyc_per_line_delivery_c, + 1); + ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_c < (unsigned int) dml_pow(2, 13)); + ASSERT(disp_dlg_regs->refcyc_per_line_delivery_c < (unsigned int) dml_pow(2, 13)); + + disp_dlg_regs->chunk_hdl_adjust_cur0 = 3; + disp_dlg_regs->dst_y_offset_cur0 = 0; + disp_dlg_regs->chunk_hdl_adjust_cur1 = 3; + disp_dlg_regs->dst_y_offset_cur1 = 0; + + disp_dlg_regs->dst_y_delta_drq_limit = 0x7fff; // off + + disp_ttu_regs->refcyc_per_req_delivery_pre_l = (unsigned int) (refcyc_per_req_delivery_pre_l + * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_l = (unsigned int) (refcyc_per_req_delivery_l + * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_pre_c = (unsigned int) (refcyc_per_req_delivery_pre_c + * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_c = (unsigned int) (refcyc_per_req_delivery_c + * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_pre_cur0 = + (unsigned int) (refcyc_per_req_delivery_pre_cur0 * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_cur0 = (unsigned int) (refcyc_per_req_delivery_cur0 + * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_pre_cur1 = + (unsigned int) (refcyc_per_req_delivery_pre_cur1 * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_cur1 = (unsigned int) (refcyc_per_req_delivery_cur1 + * dml_pow(2, 10)); + disp_ttu_regs->qos_level_low_wm = 0; + ASSERT(disp_ttu_regs->qos_level_low_wm < dml_pow(2, 14)); + disp_ttu_regs->qos_level_high_wm = (unsigned int) (4.0 * (double) htotal + * ref_freq_to_pix_freq); + /*ASSERT(disp_ttu_regs->qos_level_high_wm < dml_pow(2, 14));*/ + + disp_ttu_regs->qos_level_flip = 14; + disp_ttu_regs->qos_level_fixed_l = 8; + disp_ttu_regs->qos_level_fixed_c = 8; + disp_ttu_regs->qos_level_fixed_cur0 = 8; + disp_ttu_regs->qos_ramp_disable_l = 0; + disp_ttu_regs->qos_ramp_disable_c = 0; + disp_ttu_regs->qos_ramp_disable_cur0 = 0; + + disp_ttu_regs->min_ttu_vblank = min_ttu_vblank * refclk_freq_in_mhz; + ASSERT(disp_ttu_regs->min_ttu_vblank < dml_pow(2, 24)); + + print__ttu_regs_st(mode_lib, disp_ttu_regs); + print__dlg_regs_st(mode_lib, disp_dlg_regs); +} + +void dml20v2_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib, + display_dlg_regs_st *dlg_regs, + display_ttu_regs_st *ttu_regs, + const display_e2e_pipe_params_st *e2e_pipe_param, + const unsigned int num_pipes, + const unsigned int pipe_idx, + const bool cstate_en, + const bool pstate_en, + const bool vm_en, + const bool ignore_viewport_pos, + const bool immediate_flip_support) +{ + display_rq_params_st rq_param = {0}; + display_dlg_sys_params_st dlg_sys_param = {0}; + + // Get watermark and Tex. + dlg_sys_param.t_urg_wm_us = get_wm_urgent(mode_lib, e2e_pipe_param, num_pipes); + dlg_sys_param.deepsleep_dcfclk_mhz = get_clk_dcf_deepsleep(mode_lib, + e2e_pipe_param, + num_pipes); + dlg_sys_param.t_extra_us = get_urgent_extra_latency(mode_lib, e2e_pipe_param, num_pipes); + dlg_sys_param.mem_trip_us = get_wm_memory_trip(mode_lib, e2e_pipe_param, num_pipes); + dlg_sys_param.t_mclk_wm_us = get_wm_dram_clock_change(mode_lib, e2e_pipe_param, num_pipes); + dlg_sys_param.t_sr_wm_us = get_wm_stutter_enter_exit(mode_lib, e2e_pipe_param, num_pipes); + dlg_sys_param.total_flip_bw = get_total_immediate_flip_bw(mode_lib, + e2e_pipe_param, + num_pipes); + dlg_sys_param.total_flip_bytes = get_total_immediate_flip_bytes(mode_lib, + e2e_pipe_param, + num_pipes); + + print__dlg_sys_params_st(mode_lib, &dlg_sys_param); + + // system parameter calculation done + + dml_print("DML_DLG: Calculation for pipe[%d] start\n\n", pipe_idx); + dml20v2_rq_dlg_get_rq_params(mode_lib, &rq_param, &e2e_pipe_param[pipe_idx].pipe.src); + dml20v2_rq_dlg_get_dlg_params(mode_lib, + e2e_pipe_param, + num_pipes, + pipe_idx, + dlg_regs, + ttu_regs, + &rq_param.dlg, + &dlg_sys_param, + cstate_en, + pstate_en); + dml_print("DML_DLG: Calculation for pipe[%d] end\n", pipe_idx); +} + +static void calculate_ttu_cursor(struct display_mode_lib *mode_lib, + double *refcyc_per_req_delivery_pre_cur, + double *refcyc_per_req_delivery_cur, + double refclk_freq_in_mhz, + double ref_freq_to_pix_freq, + double hscale_pixel_rate_l, + double hscl_ratio, + double vratio_pre_l, + double vratio_l, + unsigned int cur_width, + enum cursor_bpp cur_bpp) +{ + unsigned int cur_src_width = cur_width; + unsigned int cur_req_size = 0; + unsigned int cur_req_width = 0; + double cur_width_ub = 0.0; + double cur_req_per_width = 0.0; + double hactive_cur = 0.0; + + ASSERT(cur_src_width <= 256); + + *refcyc_per_req_delivery_pre_cur = 0.0; + *refcyc_per_req_delivery_cur = 0.0; + if (cur_src_width > 0) { + unsigned int cur_bit_per_pixel = 0; + + if (cur_bpp == dm_cur_2bit) { + cur_req_size = 64; // byte + cur_bit_per_pixel = 2; + } else { // 32bit + cur_bit_per_pixel = 32; + if (cur_src_width >= 1 && cur_src_width <= 16) + cur_req_size = 64; + else if (cur_src_width >= 17 && cur_src_width <= 31) + cur_req_size = 128; + else + cur_req_size = 256; + } + + cur_req_width = (double) cur_req_size / ((double) cur_bit_per_pixel / 8.0); + cur_width_ub = dml_ceil((double) cur_src_width / (double) cur_req_width, 1) + * (double) cur_req_width; + cur_req_per_width = cur_width_ub / (double) cur_req_width; + hactive_cur = (double) cur_src_width / hscl_ratio; // TODO: oswin to think about what to do for cursor + + if (vratio_pre_l <= 1.0) { + *refcyc_per_req_delivery_pre_cur = hactive_cur * ref_freq_to_pix_freq + / (double) cur_req_per_width; + } else { + *refcyc_per_req_delivery_pre_cur = (double) refclk_freq_in_mhz + * (double) cur_src_width / hscale_pixel_rate_l + / (double) cur_req_per_width; + } + + ASSERT(*refcyc_per_req_delivery_pre_cur < dml_pow(2, 13)); + + if (vratio_l <= 1.0) { + *refcyc_per_req_delivery_cur = hactive_cur * ref_freq_to_pix_freq + / (double) cur_req_per_width; + } else { + *refcyc_per_req_delivery_cur = (double) refclk_freq_in_mhz + * (double) cur_src_width / hscale_pixel_rate_l + / (double) cur_req_per_width; + } + + dml_print("DML_DLG: %s: cur_req_width = %d\n", + __func__, + cur_req_width); + dml_print("DML_DLG: %s: cur_width_ub = %3.2f\n", + __func__, + cur_width_ub); + dml_print("DML_DLG: %s: cur_req_per_width = %3.2f\n", + __func__, + cur_req_per_width); + dml_print("DML_DLG: %s: hactive_cur = %3.2f\n", + __func__, + hactive_cur); + dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_cur = %3.2f\n", + __func__, + *refcyc_per_req_delivery_pre_cur); + dml_print("DML_DLG: %s: refcyc_per_req_delivery_cur = %3.2f\n", + __func__, + *refcyc_per_req_delivery_cur); + + ASSERT(*refcyc_per_req_delivery_cur < dml_pow(2, 13)); + } +} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.h b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.h new file mode 100644 index 000000000..2b4e46ea1 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.h @@ -0,0 +1,73 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DML20V2_DISPLAY_RQ_DLG_CALC_H__ +#define __DML20V2_DISPLAY_RQ_DLG_CALC_H__ + +#include "../display_rq_dlg_helpers.h" + +struct display_mode_lib; + + +// Function: dml_rq_dlg_get_rq_reg +// Main entry point for test to get the register values out of this DML class. +// This function calls <get_rq_param> and <extract_rq_regs> functions to calculate +// and then populate the rq_regs struct +// Input: +// pipe_src_param - pipe source configuration (e.g. vp, pitch, etc.) +// Output: +// rq_regs - struct that holds all the RQ registers field value. +// See also: <display_rq_regs_st> +void dml20v2_rq_dlg_get_rq_reg( + struct display_mode_lib *mode_lib, + display_rq_regs_st *rq_regs, + const display_pipe_params_st *pipe_param); + + +// Function: dml_rq_dlg_get_dlg_reg +// Calculate and return DLG and TTU register struct given the system setting +// Output: +// dlg_regs - output DLG register struct +// ttu_regs - output DLG TTU register struct +// Input: +// e2e_pipe_param - "compacted" array of e2e pipe param struct +// num_pipes - num of active "pipe" or "route" +// pipe_idx - index that identifies the e2e_pipe_param that corresponding to this dlg +// cstate - 0: when calculate min_ttu_vblank it is assumed cstate is not required. 1: Normal mode, cstate is considered. +// Added for legacy or unrealistic timing tests. +void dml20v2_rq_dlg_get_dlg_reg( + struct display_mode_lib *mode_lib, + display_dlg_regs_st *dlg_regs, + display_ttu_regs_st *ttu_regs, + const display_e2e_pipe_params_st *e2e_pipe_param, + const unsigned int num_pipes, + const unsigned int pipe_idx, + const bool cstate_en, + const bool pstate_en, + const bool vm_en, + const bool ignore_viewport_pos, + const bool immediate_flip_support); + +#endif |