diff options
Diffstat (limited to 'drivers/gpu/drm/amd/display/dc/dml/dcn32')
8 files changed, 14614 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c new file mode 100644 index 000000000..85e0d1c2a --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -0,0 +1,2593 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ +#include "dcn32_fpu.h" +#include "dc_link_dp.h" +#include "dcn32/dcn32_resource.h" +#include "dcn20/dcn20_resource.h" +#include "display_mode_vba_util_32.h" +// We need this includes for WATERMARKS_* defines +#include "clk_mgr/dcn32/dcn32_smu13_driver_if.h" +#include "dcn30/dcn30_resource.h" + +#define DC_LOGGER_INIT(logger) + +struct _vcs_dpi_ip_params_st dcn3_2_ip = { + .gpuvm_enable = 0, + .gpuvm_max_page_table_levels = 4, + .hostvm_enable = 0, + .rob_buffer_size_kbytes = 128, + .det_buffer_size_kbytes = DCN3_2_DEFAULT_DET_SIZE, + .config_return_buffer_size_in_kbytes = 1280, + .compressed_buffer_segment_size_in_kbytes = 64, + .meta_fifo_size_in_kentries = 22, + .zero_size_buffer_entries = 512, + .compbuf_reserved_space_64b = 256, + .compbuf_reserved_space_zs = 64, + .dpp_output_buffer_pixels = 2560, + .opp_output_buffer_lines = 1, + .pixel_chunk_size_kbytes = 8, + .alpha_pixel_chunk_size_kbytes = 4, + .min_pixel_chunk_size_bytes = 1024, + .dcc_meta_buffer_size_bytes = 6272, + .meta_chunk_size_kbytes = 2, + .min_meta_chunk_size_bytes = 256, + .writeback_chunk_size_kbytes = 8, + .ptoi_supported = false, + .num_dsc = 4, + .maximum_dsc_bits_per_component = 12, + .maximum_pixels_per_line_per_dsc_unit = 6016, + .dsc422_native_support = true, + .is_line_buffer_bpp_fixed = true, + .line_buffer_fixed_bpp = 57, + .line_buffer_size_bits = 1171920, + .max_line_buffer_lines = 32, + .writeback_interface_buffer_size_kbytes = 90, + .max_num_dpp = 4, + .max_num_otg = 4, + .max_num_hdmi_frl_outputs = 1, + .max_num_wb = 1, + .max_dchub_pscl_bw_pix_per_clk = 4, + .max_pscl_lb_bw_pix_per_clk = 2, + .max_lb_vscl_bw_pix_per_clk = 4, + .max_vscl_hscl_bw_pix_per_clk = 4, + .max_hscl_ratio = 6, + .max_vscl_ratio = 6, + .max_hscl_taps = 8, + .max_vscl_taps = 8, + .dpte_buffer_size_in_pte_reqs_luma = 64, + .dpte_buffer_size_in_pte_reqs_chroma = 34, + .dispclk_ramp_margin_percent = 1, + .max_inter_dcn_tile_repeaters = 8, + .cursor_buffer_size = 16, + .cursor_chunk_size = 2, + .writeback_line_buffer_buffer_size = 0, + .writeback_min_hscl_ratio = 1, + .writeback_min_vscl_ratio = 1, + .writeback_max_hscl_ratio = 1, + .writeback_max_vscl_ratio = 1, + .writeback_max_hscl_taps = 1, + .writeback_max_vscl_taps = 1, + .dppclk_delay_subtotal = 47, + .dppclk_delay_scl = 50, + .dppclk_delay_scl_lb_only = 16, + .dppclk_delay_cnvc_formatter = 28, + .dppclk_delay_cnvc_cursor = 6, + .dispclk_delay_subtotal = 125, + .dynamic_metadata_vm_enabled = false, + .odm_combine_4to1_supported = false, + .dcc_supported = true, + .max_num_dp2p0_outputs = 2, + .max_num_dp2p0_streams = 4, +}; + +struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc = { + .clock_limits = { + { + .state = 0, + .dcfclk_mhz = 1564.0, + .fabricclk_mhz = 400.0, + .dispclk_mhz = 2150.0, + .dppclk_mhz = 2150.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .phyclk_d32_mhz = 625.0, + .socclk_mhz = 1200.0, + .dscclk_mhz = 716.667, + .dram_speed_mts = 16000.0, + .dtbclk_mhz = 1564.0, + }, + }, + .num_states = 1, + .sr_exit_time_us = 42.97, + .sr_enter_plus_exit_time_us = 49.94, + .sr_exit_z8_time_us = 285.0, + .sr_enter_plus_exit_z8_time_us = 320, + .writeback_latency_us = 12.0, + .round_trip_ping_latency_dcfclk_cycles = 263, + .urgent_latency_pixel_data_only_us = 4.0, + .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, + .urgent_latency_vm_data_only_us = 4.0, + .fclk_change_latency_us = 20, + .usr_retraining_latency_us = 2, + .smn_latency_us = 2, + .mall_allocated_for_dcn_mbytes = 64, + .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, + .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, + .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, + .pct_ideal_sdp_bw_after_urgent = 90.0, + .pct_ideal_fabric_bw_after_urgent = 67.0, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 20.0, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, // N/A, for now keep as is until DML implemented + .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, // N/A, for now keep as is until DML implemented + .pct_ideal_dram_bw_after_urgent_strobe = 67.0, + .max_avg_sdp_bw_use_normal_percent = 80.0, + .max_avg_fabric_bw_use_normal_percent = 60.0, + .max_avg_dram_bw_use_normal_strobe_percent = 50.0, + .max_avg_dram_bw_use_normal_percent = 15.0, + .num_chans = 8, + .dram_channel_width_bytes = 2, + .fabric_datapath_to_dcn_data_return_bytes = 64, + .return_bus_width_bytes = 64, + .downspread_percent = 0.38, + .dcn_downspread_percent = 0.5, + .dram_clock_change_latency_us = 400, + .dispclk_dppclk_vco_speed_mhz = 4300.0, + .do_urgent_latency_adjustment = true, + .urgent_latency_adjustment_fabric_clock_component_us = 1.0, + .urgent_latency_adjustment_fabric_clock_reference_mhz = 3000, +}; + +void dcn32_build_wm_range_table_fpu(struct clk_mgr_internal *clk_mgr) +{ + /* defaults */ + double pstate_latency_us = clk_mgr->base.ctx->dc->dml.soc.dram_clock_change_latency_us; + double fclk_change_latency_us = clk_mgr->base.ctx->dc->dml.soc.fclk_change_latency_us; + double sr_exit_time_us = clk_mgr->base.ctx->dc->dml.soc.sr_exit_time_us; + double sr_enter_plus_exit_time_us = clk_mgr->base.ctx->dc->dml.soc.sr_enter_plus_exit_time_us; + /* For min clocks use as reported by PM FW and report those as min */ + uint16_t min_uclk_mhz = clk_mgr->base.bw_params->clk_table.entries[0].memclk_mhz; + uint16_t min_dcfclk_mhz = clk_mgr->base.bw_params->clk_table.entries[0].dcfclk_mhz; + uint16_t setb_min_uclk_mhz = min_uclk_mhz; + uint16_t dcfclk_mhz_for_the_second_state = clk_mgr->base.ctx->dc->dml.soc.clock_limits[2].dcfclk_mhz; + + dc_assert_fp_enabled(); + + /* For Set B ranges use min clocks state 2 when available, and report those to PM FW */ + if (dcfclk_mhz_for_the_second_state) + clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_dcfclk = dcfclk_mhz_for_the_second_state; + else + clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_dcfclk = clk_mgr->base.bw_params->clk_table.entries[0].dcfclk_mhz; + + if (clk_mgr->base.bw_params->clk_table.entries[2].memclk_mhz) + setb_min_uclk_mhz = clk_mgr->base.bw_params->clk_table.entries[2].memclk_mhz; + + /* Set A - Normal - default values */ + clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].valid = true; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us = pstate_latency_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.fclk_change_latency_us = fclk_change_latency_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us = sr_exit_time_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_dcfclk = min_dcfclk_mhz; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_dcfclk = 0xFFFF; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_uclk = min_uclk_mhz; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_uclk = 0xFFFF; + + /* Set B - Performance - higher clocks, using DPM[2] DCFCLK and UCLK */ + clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].valid = true; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us = pstate_latency_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.fclk_change_latency_us = fclk_change_latency_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us = sr_exit_time_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_dcfclk = 0xFFFF; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_uclk = setb_min_uclk_mhz; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_uclk = 0xFFFF; + + /* Set C - Dummy P-State - P-State latency set to "dummy p-state" value */ + /* 'DalDummyClockChangeLatencyNs' registry key option set to 0x7FFFFFFF can be used to disable Set C for dummy p-state */ + if (clk_mgr->base.ctx->dc->bb_overrides.dummy_clock_change_latency_ns != 0x7FFFFFFF) { + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].valid = true; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us = 50; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.fclk_change_latency_us = fclk_change_latency_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us = sr_exit_time_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.wm_type = WATERMARKS_DUMMY_PSTATE; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_dcfclk = min_dcfclk_mhz; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_dcfclk = 0xFFFF; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_uclk = min_uclk_mhz; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_uclk = 0xFFFF; + clk_mgr->base.bw_params->dummy_pstate_table[0].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[0].memclk_mhz * 16; + clk_mgr->base.bw_params->dummy_pstate_table[0].dummy_pstate_latency_us = 50; + clk_mgr->base.bw_params->dummy_pstate_table[1].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[1].memclk_mhz * 16; + clk_mgr->base.bw_params->dummy_pstate_table[1].dummy_pstate_latency_us = 9; + clk_mgr->base.bw_params->dummy_pstate_table[2].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[2].memclk_mhz * 16; + clk_mgr->base.bw_params->dummy_pstate_table[2].dummy_pstate_latency_us = 8; + clk_mgr->base.bw_params->dummy_pstate_table[3].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[3].memclk_mhz * 16; + clk_mgr->base.bw_params->dummy_pstate_table[3].dummy_pstate_latency_us = 5; + } + /* Set D - MALL - SR enter and exit time specific to MALL, TBD after bringup or later phase for now use DRAM values / 2 */ + /* For MALL DRAM clock change latency is N/A, for watermak calculations use lowest value dummy P state latency */ + clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].valid = true; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us = clk_mgr->base.bw_params->dummy_pstate_table[3].dummy_pstate_latency_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.fclk_change_latency_us = fclk_change_latency_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us = sr_exit_time_us / 2; // TBD + clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us / 2; // TBD + clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.wm_type = WATERMARKS_MALL; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_dcfclk = min_dcfclk_mhz; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_dcfclk = 0xFFFF; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_uclk = min_uclk_mhz; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_uclk = 0xFFFF; +} + +/* + * Finds dummy_latency_index when MCLK switching using firmware based + * vblank stretch is enabled. This function will iterate through the + * table of dummy pstate latencies until the lowest value that allows + * dm_allow_self_refresh_and_mclk_switch to happen is found + */ +int dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel) +{ + const int max_latency_table_entries = 4; + const struct vba_vars_st *vba = &context->bw_ctx.dml.vba; + int dummy_latency_index = 0; + + dc_assert_fp_enabled(); + + while (dummy_latency_index < max_latency_table_entries) { + context->bw_ctx.dml.soc.dram_clock_change_latency_us = + dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us; + dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, false); + + if (vlevel < context->bw_ctx.dml.vba.soc.num_states && + vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] != dm_dram_clock_change_unsupported) + break; + + dummy_latency_index++; + } + + if (dummy_latency_index == max_latency_table_entries) { + ASSERT(dummy_latency_index != max_latency_table_entries); + /* If the execution gets here, it means dummy p_states are + * not possible. This should never happen and would mean + * something is severely wrong. + * Here we reset dummy_latency_index to 3, because it is + * better to have underflows than system crashes. + */ + dummy_latency_index = max_latency_table_entries - 1; + } + + return dummy_latency_index; +} + +/** + * dcn32_helper_populate_phantom_dlg_params - Get DLG params for phantom pipes + * and populate pipe_ctx with those params. + * @dc: [in] current dc state + * @context: [in] new dc state + * @pipes: [in] DML pipe params array + * @pipe_cnt: [in] DML pipe count + * + * This function must be called AFTER the phantom pipes are added to context + * and run through DML (so that the DLG params for the phantom pipes can be + * populated), and BEFORE we program the timing for the phantom pipes. + */ +void dcn32_helper_populate_phantom_dlg_params(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt) +{ + uint32_t i, pipe_idx; + + dc_assert_fp_enabled(); + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (!pipe->stream) + continue; + + if (pipe->plane_state && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { + pipes[pipe_idx].pipe.dest.vstartup_start = + get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + pipes[pipe_idx].pipe.dest.vupdate_offset = + get_vupdate_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + pipes[pipe_idx].pipe.dest.vupdate_width = + get_vupdate_width(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + pipes[pipe_idx].pipe.dest.vready_offset = + get_vready_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + pipe->pipe_dlg_param = pipes[pipe_idx].pipe.dest; + } + pipe_idx++; + } +} + +/** + * dcn32_predict_pipe_split - Predict if pipe split will occur for a given DML pipe + * @context: [in] New DC state to be programmed + * @pipe_e2e: [in] DML pipe end to end context + * + * This function takes in a DML pipe (pipe_e2e) and predicts if pipe split is required (both + * ODM and MPC). For pipe split, ODM combine is determined by the ODM mode, and MPC combine is + * determined by DPPClk requirements + * + * This function follows the same policy as DML: + * - Check for ODM combine requirements / policy first + * - MPC combine is only chosen if there is no ODM combine requirements / policy in place, and + * MPC is required + * + * Return: Number of splits expected (1 for 2:1 split, 3 for 4:1 split, 0 for no splits). + */ +uint8_t dcn32_predict_pipe_split(struct dc_state *context, + display_e2e_pipe_params_st *pipe_e2e) +{ + double pscl_throughput; + double pscl_throughput_chroma; + double dpp_clk_single_dpp, clock; + double clk_frequency = 0.0; + double vco_speed = context->bw_ctx.dml.soc.dispclk_dppclk_vco_speed_mhz; + bool total_available_pipes_support = false; + uint32_t number_of_dpp = 0; + enum odm_combine_mode odm_mode = dm_odm_combine_mode_disabled; + double req_dispclk_per_surface = 0; + uint8_t num_splits = 0; + + dc_assert_fp_enabled(); + + dml32_CalculateODMMode(context->bw_ctx.dml.ip.maximum_pixels_per_line_per_dsc_unit, + pipe_e2e->pipe.dest.hactive, + pipe_e2e->dout.output_format, + pipe_e2e->dout.output_type, + pipe_e2e->pipe.dest.odm_combine_policy, + context->bw_ctx.dml.soc.clock_limits[context->bw_ctx.dml.soc.num_states - 1].dispclk_mhz, + context->bw_ctx.dml.soc.clock_limits[context->bw_ctx.dml.soc.num_states - 1].dispclk_mhz, + pipe_e2e->dout.dsc_enable != 0, + 0, /* TotalNumberOfActiveDPP can be 0 since we're predicting pipe split requirement */ + context->bw_ctx.dml.ip.max_num_dpp, + pipe_e2e->pipe.dest.pixel_rate_mhz, + context->bw_ctx.dml.soc.dcn_downspread_percent, + context->bw_ctx.dml.ip.dispclk_ramp_margin_percent, + context->bw_ctx.dml.soc.dispclk_dppclk_vco_speed_mhz, + pipe_e2e->dout.dsc_slices, + /* Output */ + &total_available_pipes_support, + &number_of_dpp, + &odm_mode, + &req_dispclk_per_surface); + + dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(pipe_e2e->pipe.scale_ratio_depth.hscl_ratio, + pipe_e2e->pipe.scale_ratio_depth.hscl_ratio_c, + pipe_e2e->pipe.scale_ratio_depth.vscl_ratio, + pipe_e2e->pipe.scale_ratio_depth.vscl_ratio_c, + context->bw_ctx.dml.ip.max_dchub_pscl_bw_pix_per_clk, + context->bw_ctx.dml.ip.max_pscl_lb_bw_pix_per_clk, + pipe_e2e->pipe.dest.pixel_rate_mhz, + pipe_e2e->pipe.src.source_format, + pipe_e2e->pipe.scale_taps.htaps, + pipe_e2e->pipe.scale_taps.htaps_c, + pipe_e2e->pipe.scale_taps.vtaps, + pipe_e2e->pipe.scale_taps.vtaps_c, + /* Output */ + &pscl_throughput, &pscl_throughput_chroma, + &dpp_clk_single_dpp); + + clock = dpp_clk_single_dpp * (1 + context->bw_ctx.dml.soc.dcn_downspread_percent / 100); + + if (clock > 0) + clk_frequency = vco_speed * 4.0 / ((int)(vco_speed * 4.0) / clock); + + if (odm_mode == dm_odm_combine_mode_2to1) + num_splits = 1; + else if (odm_mode == dm_odm_combine_mode_4to1) + num_splits = 3; + else if (clk_frequency > context->bw_ctx.dml.soc.clock_limits[context->bw_ctx.dml.soc.num_states - 1].dppclk_mhz) + num_splits = 1; + + return num_splits; +} + +static float calculate_net_bw_in_kbytes_sec(struct _vcs_dpi_voltage_scaling_st *entry) +{ + float memory_bw_kbytes_sec; + float fabric_bw_kbytes_sec; + float sdp_bw_kbytes_sec; + float limiting_bw_kbytes_sec; + + memory_bw_kbytes_sec = entry->dram_speed_mts * + dcn3_2_soc.num_chans * + dcn3_2_soc.dram_channel_width_bytes * + ((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100); + + fabric_bw_kbytes_sec = entry->fabricclk_mhz * + dcn3_2_soc.return_bus_width_bytes * + ((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100); + + sdp_bw_kbytes_sec = entry->dcfclk_mhz * + dcn3_2_soc.return_bus_width_bytes * + ((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100); + + limiting_bw_kbytes_sec = memory_bw_kbytes_sec; + + if (fabric_bw_kbytes_sec < limiting_bw_kbytes_sec) + limiting_bw_kbytes_sec = fabric_bw_kbytes_sec; + + if (sdp_bw_kbytes_sec < limiting_bw_kbytes_sec) + limiting_bw_kbytes_sec = sdp_bw_kbytes_sec; + + return limiting_bw_kbytes_sec; +} + +static void get_optimal_ntuple(struct _vcs_dpi_voltage_scaling_st *entry) +{ + if (entry->dcfclk_mhz > 0) { + float bw_on_sdp = entry->dcfclk_mhz * dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100); + + entry->fabricclk_mhz = bw_on_sdp / (dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100)); + entry->dram_speed_mts = bw_on_sdp / (dcn3_2_soc.num_chans * + dcn3_2_soc.dram_channel_width_bytes * ((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100)); + } else if (entry->fabricclk_mhz > 0) { + float bw_on_fabric = entry->fabricclk_mhz * dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100); + + entry->dcfclk_mhz = bw_on_fabric / (dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100)); + entry->dram_speed_mts = bw_on_fabric / (dcn3_2_soc.num_chans * + dcn3_2_soc.dram_channel_width_bytes * ((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100)); + } else if (entry->dram_speed_mts > 0) { + float bw_on_dram = entry->dram_speed_mts * dcn3_2_soc.num_chans * + dcn3_2_soc.dram_channel_width_bytes * ((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100); + + entry->fabricclk_mhz = bw_on_dram / (dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100)); + entry->dcfclk_mhz = bw_on_dram / (dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100)); + } +} + +void insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table, + unsigned int *num_entries, + struct _vcs_dpi_voltage_scaling_st *entry) +{ + int i = 0; + int index = 0; + float net_bw_of_new_state = 0; + + dc_assert_fp_enabled(); + + get_optimal_ntuple(entry); + + if (*num_entries == 0) { + table[0] = *entry; + (*num_entries)++; + } else { + net_bw_of_new_state = calculate_net_bw_in_kbytes_sec(entry); + while (net_bw_of_new_state > calculate_net_bw_in_kbytes_sec(&table[index])) { + index++; + if (index >= *num_entries) + break; + } + + for (i = *num_entries; i > index; i--) + table[i] = table[i - 1]; + + table[index] = *entry; + (*num_entries)++; + } +} + +/** + * dcn32_set_phantom_stream_timing - Set timing params for the phantom stream + * @dc: current dc state + * @context: new dc state + * @ref_pipe: Main pipe for the phantom stream + * @phantom_stream: target phantom stream state + * @pipes: DML pipe params + * @pipe_cnt: number of DML pipes + * @dc_pipe_idx: DC pipe index for the main pipe (i.e. ref_pipe) + * + * Set timing params of the phantom stream based on calculated output from DML. + * This function first gets the DML pipe index using the DC pipe index, then + * calls into DML (get_subviewport_lines_needed_in_mall) to get the number of + * lines required for SubVP MCLK switching and assigns to the phantom stream + * accordingly. + * + * - The number of SubVP lines calculated in DML does not take into account + * FW processing delays and required pstate allow width, so we must include + * that separately. + * + * - Set phantom backporch = vstartup of main pipe + */ +void dcn32_set_phantom_stream_timing(struct dc *dc, + struct dc_state *context, + struct pipe_ctx *ref_pipe, + struct dc_stream_state *phantom_stream, + display_e2e_pipe_params_st *pipes, + unsigned int pipe_cnt, + unsigned int dc_pipe_idx) +{ + unsigned int i, pipe_idx; + struct pipe_ctx *pipe; + uint32_t phantom_vactive, phantom_bp, pstate_width_fw_delay_lines; + unsigned int num_dpp; + unsigned int vlevel = context->bw_ctx.dml.vba.VoltageLevel; + unsigned int dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; + unsigned int socclk = context->bw_ctx.dml.vba.SOCCLKPerState[vlevel]; + struct vba_vars_st *vba = &context->bw_ctx.dml.vba; + + dc_assert_fp_enabled(); + + // Find DML pipe index (pipe_idx) using dc_pipe_idx + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + pipe = &context->res_ctx.pipe_ctx[i]; + + if (!pipe->stream) + continue; + + if (i == dc_pipe_idx) + break; + + pipe_idx++; + } + + // Calculate lines required for pstate allow width and FW processing delays + pstate_width_fw_delay_lines = ((double)(dc->caps.subvp_fw_processing_delay_us + + dc->caps.subvp_pstate_allow_width_us) / 1000000) * + (ref_pipe->stream->timing.pix_clk_100hz * 100) / + (double)ref_pipe->stream->timing.h_total; + + // Update clks_cfg for calling into recalculate + pipes[0].clks_cfg.voltage = vlevel; + pipes[0].clks_cfg.dcfclk_mhz = dcfclk; + pipes[0].clks_cfg.socclk_mhz = socclk; + + // DML calculation for MALL region doesn't take into account FW delay + // and required pstate allow width for multi-display cases + /* Add 16 lines margin to the MALL REGION because SUB_VP_START_LINE must be aligned + * to 2 swaths (i.e. 16 lines) + */ + phantom_vactive = get_subviewport_lines_needed_in_mall(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx) + + pstate_width_fw_delay_lines + dc->caps.subvp_swath_height_margin_lines; + + // W/A for DCC corruption with certain high resolution timings. + // Determing if pipesplit is used. If so, add meta_row_height to the phantom vactive. + num_dpp = vba->NoOfDPP[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]]; + phantom_vactive += num_dpp > 1 ? vba->meta_row_height[vba->pipe_plane[pipe_idx]] : 0; + + // For backporch of phantom pipe, use vstartup of the main pipe + phantom_bp = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + + phantom_stream->dst.y = 0; + phantom_stream->dst.height = phantom_vactive; + phantom_stream->src.y = 0; + phantom_stream->src.height = phantom_vactive; + + phantom_stream->timing.v_addressable = phantom_vactive; + phantom_stream->timing.v_front_porch = 1; + phantom_stream->timing.v_total = phantom_stream->timing.v_addressable + + phantom_stream->timing.v_front_porch + + phantom_stream->timing.v_sync_width + + phantom_bp; + phantom_stream->timing.flags.DSC = 0; // Don't need DSC for phantom timing +} + +/** + * dcn32_get_num_free_pipes - Calculate number of free pipes + * @dc: current dc state + * @context: new dc state + * + * This function assumes that a "used" pipe is a pipe that has + * both a stream and a plane assigned to it. + * + * Return: Number of free pipes available in the context + */ +static unsigned int dcn32_get_num_free_pipes(struct dc *dc, struct dc_state *context) +{ + unsigned int i; + unsigned int free_pipes = 0; + unsigned int num_pipes = 0; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (pipe->stream && !pipe->top_pipe) { + while (pipe) { + num_pipes++; + pipe = pipe->bottom_pipe; + } + } + } + + free_pipes = dc->res_pool->pipe_count - num_pipes; + return free_pipes; +} + +/** + * dcn32_assign_subvp_pipe - Function to decide which pipe will use Sub-VP. + * @dc: current dc state + * @context: new dc state + * @index: [out] dc pipe index for the pipe chosen to have phantom pipes assigned + * + * We enter this function if we are Sub-VP capable (i.e. enough pipes available) + * and regular P-State switching (i.e. VACTIVE/VBLANK) is not supported, or if + * we are forcing SubVP P-State switching on the current config. + * + * The number of pipes used for the chosen surface must be less than or equal to the + * number of free pipes available. + * + * In general we choose surfaces with the longest frame time first (better for SubVP + VBLANK). + * For multi-display cases the ActiveDRAMClockChangeMargin doesn't provide enough info on its own + * for determining which should be the SubVP pipe (need a way to determine if a pipe / plane doesn't + * support MCLK switching naturally [i.e. ACTIVE or VBLANK]). + * + * Return: True if a valid pipe assignment was found for Sub-VP. Otherwise false. + */ +static bool dcn32_assign_subvp_pipe(struct dc *dc, + struct dc_state *context, + unsigned int *index) +{ + unsigned int i, pipe_idx; + unsigned int max_frame_time = 0; + bool valid_assignment_found = false; + unsigned int free_pipes = dcn32_get_num_free_pipes(dc, context); + bool current_assignment_freesync = false; + struct vba_vars_st *vba = &context->bw_ctx.dml.vba; + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + unsigned int num_pipes = 0; + unsigned int refresh_rate = 0; + + if (!pipe->stream) + continue; + + // Round up + refresh_rate = (pipe->stream->timing.pix_clk_100hz * 100 + + pipe->stream->timing.v_total * pipe->stream->timing.h_total - 1) + / (double)(pipe->stream->timing.v_total * pipe->stream->timing.h_total); + /* SubVP pipe candidate requirements: + * - Refresh rate < 120hz + * - Not able to switch in vactive naturally (switching in active means the + * DET provides enough buffer to hide the P-State switch latency -- trying + * to combine this with SubVP can cause issues with the scheduling). + * - Not TMZ surface + */ + if (pipe->plane_state && !pipe->top_pipe && + pipe->stream->mall_stream_config.type == SUBVP_NONE && refresh_rate < 120 && !pipe->plane_state->address.tmz_surface && + (vba->ActiveDRAMClockChangeLatencyMarginPerState[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]] <= 0 || + (vba->ActiveDRAMClockChangeLatencyMarginPerState[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]] > 0 && + dcn32_allow_subvp_with_active_margin(pipe)))) { + while (pipe) { + num_pipes++; + pipe = pipe->bottom_pipe; + } + + pipe = &context->res_ctx.pipe_ctx[i]; + if (num_pipes <= free_pipes) { + struct dc_stream_state *stream = pipe->stream; + unsigned int frame_us = (stream->timing.v_total * stream->timing.h_total / + (double)(stream->timing.pix_clk_100hz * 100)) * 1000000; + if (frame_us > max_frame_time && !stream->ignore_msa_timing_param) { + *index = i; + max_frame_time = frame_us; + valid_assignment_found = true; + current_assignment_freesync = false; + /* For the 2-Freesync display case, still choose the one with the + * longest frame time + */ + } else if (stream->ignore_msa_timing_param && (!valid_assignment_found || + (current_assignment_freesync && frame_us > max_frame_time))) { + *index = i; + valid_assignment_found = true; + current_assignment_freesync = true; + } + } + } + pipe_idx++; + } + return valid_assignment_found; +} + +/** + * dcn32_enough_pipes_for_subvp - Function to check if there are "enough" pipes for SubVP. + * @dc: current dc state + * @context: new dc state + * + * This function returns true if there are enough free pipes + * to create the required phantom pipes for any given stream + * (that does not already have phantom pipe assigned). + * + * e.g. For a 2 stream config where the first stream uses one + * pipe and the second stream uses 2 pipes (i.e. pipe split), + * this function will return true because there is 1 remaining + * pipe which can be used as the phantom pipe for the non pipe + * split pipe. + * + * Return: + * True if there are enough free pipes to assign phantom pipes to at least one + * stream that does not already have phantom pipes assigned. Otherwise false. + */ +static bool dcn32_enough_pipes_for_subvp(struct dc *dc, struct dc_state *context) +{ + unsigned int i, split_cnt, free_pipes; + unsigned int min_pipe_split = dc->res_pool->pipe_count + 1; // init as max number of pipes + 1 + bool subvp_possible = false; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + // Find the minimum pipe split count for non SubVP pipes + if (pipe->stream && !pipe->top_pipe && + pipe->stream->mall_stream_config.type == SUBVP_NONE) { + split_cnt = 0; + while (pipe) { + split_cnt++; + pipe = pipe->bottom_pipe; + } + + if (split_cnt < min_pipe_split) + min_pipe_split = split_cnt; + } + } + + free_pipes = dcn32_get_num_free_pipes(dc, context); + + // SubVP only possible if at least one pipe is being used (i.e. free_pipes + // should not equal to the pipe_count) + if (free_pipes >= min_pipe_split && free_pipes < dc->res_pool->pipe_count) + subvp_possible = true; + + return subvp_possible; +} + +/** + * subvp_subvp_schedulable - Determine if SubVP + SubVP config is schedulable + * @dc: current dc state + * @context: new dc state + * + * High level algorithm: + * 1. Find longest microschedule length (in us) between the two SubVP pipes + * 2. Check if the worst case overlap (VBLANK in middle of ACTIVE) for both + * pipes still allows for the maximum microschedule to fit in the active + * region for both pipes. + * + * Return: True if the SubVP + SubVP config is schedulable, false otherwise + */ +static bool subvp_subvp_schedulable(struct dc *dc, struct dc_state *context) +{ + struct pipe_ctx *subvp_pipes[2]; + struct dc_stream_state *phantom = NULL; + uint32_t microschedule_lines = 0; + uint32_t index = 0; + uint32_t i; + uint32_t max_microschedule_us = 0; + int32_t vactive1_us, vactive2_us, vblank1_us, vblank2_us; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + uint32_t time_us = 0; + + /* Loop to calculate the maximum microschedule time between the two SubVP pipes, + * and also to store the two main SubVP pipe pointers in subvp_pipes[2]. + */ + if (pipe->stream && pipe->plane_state && !pipe->top_pipe && + pipe->stream->mall_stream_config.type == SUBVP_MAIN) { + phantom = pipe->stream->mall_stream_config.paired_stream; + microschedule_lines = (phantom->timing.v_total - phantom->timing.v_front_porch) + + phantom->timing.v_addressable; + + // Round up when calculating microschedule time (+ 1 at the end) + time_us = (microschedule_lines * phantom->timing.h_total) / + (double)(phantom->timing.pix_clk_100hz * 100) * 1000000 + + dc->caps.subvp_prefetch_end_to_mall_start_us + + dc->caps.subvp_fw_processing_delay_us + 1; + if (time_us > max_microschedule_us) + max_microschedule_us = time_us; + + subvp_pipes[index] = pipe; + index++; + + // Maximum 2 SubVP pipes + if (index == 2) + break; + } + } + vactive1_us = ((subvp_pipes[0]->stream->timing.v_addressable * subvp_pipes[0]->stream->timing.h_total) / + (double)(subvp_pipes[0]->stream->timing.pix_clk_100hz * 100)) * 1000000; + vactive2_us = ((subvp_pipes[1]->stream->timing.v_addressable * subvp_pipes[1]->stream->timing.h_total) / + (double)(subvp_pipes[1]->stream->timing.pix_clk_100hz * 100)) * 1000000; + vblank1_us = (((subvp_pipes[0]->stream->timing.v_total - subvp_pipes[0]->stream->timing.v_addressable) * + subvp_pipes[0]->stream->timing.h_total) / + (double)(subvp_pipes[0]->stream->timing.pix_clk_100hz * 100)) * 1000000; + vblank2_us = (((subvp_pipes[1]->stream->timing.v_total - subvp_pipes[1]->stream->timing.v_addressable) * + subvp_pipes[1]->stream->timing.h_total) / + (double)(subvp_pipes[1]->stream->timing.pix_clk_100hz * 100)) * 1000000; + + if ((vactive1_us - vblank2_us) / 2 > max_microschedule_us && + (vactive2_us - vblank1_us) / 2 > max_microschedule_us) + return true; + + return false; +} + +/** + * subvp_drr_schedulable - Determine if SubVP + DRR config is schedulable + * @dc: current dc state + * @context: new dc state + * @drr_pipe: DRR pipe_ctx for the SubVP + DRR config + * + * High level algorithm: + * 1. Get timing for SubVP pipe, phantom pipe, and DRR pipe + * 2. Determine the frame time for the DRR display when adding required margin for MCLK switching + * (the margin is equal to the MALL region + DRR margin (500us)) + * 3.If (SubVP Active - Prefetch > Stretched DRR frame + max(MALL region, Stretched DRR frame)) + * then report the configuration as supported + * + * Return: True if the SubVP + DRR config is schedulable, false otherwise + */ +static bool subvp_drr_schedulable(struct dc *dc, struct dc_state *context, struct pipe_ctx *drr_pipe) +{ + bool schedulable = false; + uint32_t i; + struct pipe_ctx *pipe = NULL; + struct dc_crtc_timing *main_timing = NULL; + struct dc_crtc_timing *phantom_timing = NULL; + struct dc_crtc_timing *drr_timing = NULL; + int16_t prefetch_us = 0; + int16_t mall_region_us = 0; + int16_t drr_frame_us = 0; // nominal frame time + int16_t subvp_active_us = 0; + int16_t stretched_drr_us = 0; + int16_t drr_stretched_vblank_us = 0; + int16_t max_vblank_mallregion = 0; + + // Find SubVP pipe + for (i = 0; i < dc->res_pool->pipe_count; i++) { + pipe = &context->res_ctx.pipe_ctx[i]; + + // We check for master pipe, but it shouldn't matter since we only need + // the pipe for timing info (stream should be same for any pipe splits) + if (!pipe->stream || !pipe->plane_state || pipe->top_pipe || pipe->prev_odm_pipe) + continue; + + // Find the SubVP pipe + if (pipe->stream->mall_stream_config.type == SUBVP_MAIN) + break; + } + + main_timing = &pipe->stream->timing; + phantom_timing = &pipe->stream->mall_stream_config.paired_stream->timing; + drr_timing = &drr_pipe->stream->timing; + prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total / + (double)(phantom_timing->pix_clk_100hz * 100) * 1000000 + + dc->caps.subvp_prefetch_end_to_mall_start_us; + subvp_active_us = main_timing->v_addressable * main_timing->h_total / + (double)(main_timing->pix_clk_100hz * 100) * 1000000; + drr_frame_us = drr_timing->v_total * drr_timing->h_total / + (double)(drr_timing->pix_clk_100hz * 100) * 1000000; + // P-State allow width and FW delays already included phantom_timing->v_addressable + mall_region_us = phantom_timing->v_addressable * phantom_timing->h_total / + (double)(phantom_timing->pix_clk_100hz * 100) * 1000000; + stretched_drr_us = drr_frame_us + mall_region_us + SUBVP_DRR_MARGIN_US; + drr_stretched_vblank_us = (drr_timing->v_total - drr_timing->v_addressable) * drr_timing->h_total / + (double)(drr_timing->pix_clk_100hz * 100) * 1000000 + (stretched_drr_us - drr_frame_us); + max_vblank_mallregion = drr_stretched_vblank_us > mall_region_us ? drr_stretched_vblank_us : mall_region_us; + + /* We consider SubVP + DRR schedulable if the stretched frame duration of the DRR display (i.e. the + * highest refresh rate + margin that can support UCLK P-State switch) passes the static analysis + * for VBLANK: (VACTIVE region of the SubVP pipe can fit the MALL prefetch, VBLANK frame time, + * and the max of (VBLANK blanking time, MALL region)). + */ + if (stretched_drr_us < (1 / (double)drr_timing->min_refresh_in_uhz) * 1000000 * 1000000 && + subvp_active_us - prefetch_us - stretched_drr_us - max_vblank_mallregion > 0) + schedulable = true; + + return schedulable; +} + + +/** + * subvp_vblank_schedulable - Determine if SubVP + VBLANK config is schedulable + * @dc: current dc state + * @context: new dc state + * + * High level algorithm: + * 1. Get timing for SubVP pipe, phantom pipe, and VBLANK pipe + * 2. If (SubVP Active - Prefetch > Vblank Frame Time + max(MALL region, Vblank blanking time)) + * then report the configuration as supported + * 3. If the VBLANK display is DRR, then take the DRR static schedulability path + * + * Return: True if the SubVP + VBLANK/DRR config is schedulable, false otherwise + */ +static bool subvp_vblank_schedulable(struct dc *dc, struct dc_state *context) +{ + struct pipe_ctx *pipe = NULL; + struct pipe_ctx *subvp_pipe = NULL; + bool found = false; + bool schedulable = false; + uint32_t i = 0; + uint8_t vblank_index = 0; + uint16_t prefetch_us = 0; + uint16_t mall_region_us = 0; + uint16_t vblank_frame_us = 0; + uint16_t subvp_active_us = 0; + uint16_t vblank_blank_us = 0; + uint16_t max_vblank_mallregion = 0; + struct dc_crtc_timing *main_timing = NULL; + struct dc_crtc_timing *phantom_timing = NULL; + struct dc_crtc_timing *vblank_timing = NULL; + + /* For SubVP + VBLANK/DRR cases, we assume there can only be + * a single VBLANK/DRR display. If DML outputs SubVP + VBLANK + * is supported, it is either a single VBLANK case or two VBLANK + * displays which are synchronized (in which case they have identical + * timings). + */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + pipe = &context->res_ctx.pipe_ctx[i]; + + // We check for master pipe, but it shouldn't matter since we only need + // the pipe for timing info (stream should be same for any pipe splits) + if (!pipe->stream || !pipe->plane_state || pipe->top_pipe || pipe->prev_odm_pipe) + continue; + + if (!found && pipe->stream->mall_stream_config.type == SUBVP_NONE) { + // Found pipe which is not SubVP or Phantom (i.e. the VBLANK pipe). + vblank_index = i; + found = true; + } + + if (!subvp_pipe && pipe->stream->mall_stream_config.type == SUBVP_MAIN) + subvp_pipe = pipe; + } + // Use ignore_msa_timing_param flag to identify as DRR + if (found && context->res_ctx.pipe_ctx[vblank_index].stream->ignore_msa_timing_param) { + // SUBVP + DRR case + schedulable = subvp_drr_schedulable(dc, context, &context->res_ctx.pipe_ctx[vblank_index]); + } else if (found) { + main_timing = &subvp_pipe->stream->timing; + phantom_timing = &subvp_pipe->stream->mall_stream_config.paired_stream->timing; + vblank_timing = &context->res_ctx.pipe_ctx[vblank_index].stream->timing; + // Prefetch time is equal to VACTIVE + BP + VSYNC of the phantom pipe + // Also include the prefetch end to mallstart delay time + prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total / + (double)(phantom_timing->pix_clk_100hz * 100) * 1000000 + + dc->caps.subvp_prefetch_end_to_mall_start_us; + // P-State allow width and FW delays already included phantom_timing->v_addressable + mall_region_us = phantom_timing->v_addressable * phantom_timing->h_total / + (double)(phantom_timing->pix_clk_100hz * 100) * 1000000; + vblank_frame_us = vblank_timing->v_total * vblank_timing->h_total / + (double)(vblank_timing->pix_clk_100hz * 100) * 1000000; + vblank_blank_us = (vblank_timing->v_total - vblank_timing->v_addressable) * vblank_timing->h_total / + (double)(vblank_timing->pix_clk_100hz * 100) * 1000000; + subvp_active_us = main_timing->v_addressable * main_timing->h_total / + (double)(main_timing->pix_clk_100hz * 100) * 1000000; + max_vblank_mallregion = vblank_blank_us > mall_region_us ? vblank_blank_us : mall_region_us; + + // Schedulable if VACTIVE region of the SubVP pipe can fit the MALL prefetch, VBLANK frame time, + // and the max of (VBLANK blanking time, MALL region) + // TODO: Possibly add some margin (i.e. the below conditions should be [...] > X instead of [...] > 0) + if (subvp_active_us - prefetch_us - vblank_frame_us - max_vblank_mallregion > 0) + schedulable = true; + } + return schedulable; +} + +/** + * subvp_validate_static_schedulability - Check which SubVP case is calculated + * and handle static analysis based on the case. + * @dc: current dc state + * @context: new dc state + * @vlevel: Voltage level calculated by DML + * + * Three cases: + * 1. SubVP + SubVP + * 2. SubVP + VBLANK (DRR checked internally) + * 3. SubVP + VACTIVE (currently unsupported) + * + * Return: True if statically schedulable, false otherwise + */ +static bool subvp_validate_static_schedulability(struct dc *dc, + struct dc_state *context, + int vlevel) +{ + bool schedulable = true; // true by default for single display case + struct vba_vars_st *vba = &context->bw_ctx.dml.vba; + uint32_t i, pipe_idx; + uint8_t subvp_count = 0; + uint8_t vactive_count = 0; + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (!pipe->stream) + continue; + + if (pipe->plane_state && !pipe->top_pipe && + pipe->stream->mall_stream_config.type == SUBVP_MAIN) + subvp_count++; + + // Count how many planes that aren't SubVP/phantom are capable of VACTIVE + // switching (SubVP + VACTIVE unsupported). In situations where we force + // SubVP for a VACTIVE plane, we don't want to increment the vactive_count. + if (vba->ActiveDRAMClockChangeLatencyMargin[vba->pipe_plane[pipe_idx]] > 0 && + pipe->stream->mall_stream_config.type == SUBVP_NONE) { + vactive_count++; + } + pipe_idx++; + } + + if (subvp_count == 2) { + // Static schedulability check for SubVP + SubVP case + schedulable = subvp_subvp_schedulable(dc, context); + } else if (vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_vblank_w_mall_sub_vp) { + // Static schedulability check for SubVP + VBLANK case. Also handle the case where + // DML outputs SubVP + VBLANK + VACTIVE (DML will report as SubVP + VBLANK) + if (vactive_count > 0) + schedulable = false; + else + schedulable = subvp_vblank_schedulable(dc, context); + } else if (vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_vactive_w_mall_sub_vp && + vactive_count > 0) { + // For single display SubVP cases, DML will output dm_dram_clock_change_vactive_w_mall_sub_vp by default. + // We tell the difference between SubVP vs. SubVP + VACTIVE by checking the vactive_count. + // SubVP + VACTIVE currently unsupported + schedulable = false; + } + return schedulable; +} + +static void dcn32_full_validate_bw_helper(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int *vlevel, + int *split, + bool *merge, + int *pipe_cnt) +{ + struct vba_vars_st *vba = &context->bw_ctx.dml.vba; + unsigned int dc_pipe_idx = 0; + bool found_supported_config = false; + struct pipe_ctx *pipe = NULL; + uint32_t non_subvp_pipes = 0; + bool drr_pipe_found = false; + uint32_t drr_pipe_index = 0; + uint32_t i = 0; + + dc_assert_fp_enabled(); + + /* + * DML favors voltage over p-state, but we're more interested in + * supporting p-state over voltage. We can't support p-state in + * prefetch mode > 0 so try capping the prefetch mode to start. + * Override present for testing. + */ + if (dc->debug.dml_disallow_alternate_prefetch_modes) + context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = + dm_prefetch_support_uclk_fclk_and_stutter; + else + context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = + dm_prefetch_support_uclk_fclk_and_stutter_if_possible; + + *vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt); + /* This may adjust vlevel and maxMpcComb */ + if (*vlevel < context->bw_ctx.dml.soc.num_states) { + *vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, *vlevel, split, merge); + vba->VoltageLevel = *vlevel; + } + + /* Conditions for setting up phantom pipes for SubVP: + * 1. Not force disable SubVP + * 2. Full update (i.e. !fast_validate) + * 3. Enough pipes are available to support SubVP (TODO: Which pipes will use VACTIVE / VBLANK / SUBVP?) + * 4. Display configuration passes validation + * 5. (Config doesn't support MCLK in VACTIVE/VBLANK || dc->debug.force_subvp_mclk_switch) + */ + if (!dc->debug.force_disable_subvp && dcn32_all_pipes_have_stream_and_plane(dc, context) && + !dcn32_mpo_in_use(context) && !dcn32_any_surfaces_rotated(dc, context) && + (*vlevel == context->bw_ctx.dml.soc.num_states || + vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported || + dc->debug.force_subvp_mclk_switch)) { + + dcn32_merge_pipes_for_subvp(dc, context); + memset(merge, 0, MAX_PIPES * sizeof(bool)); + + /* to re-initialize viewport after the pipe merge */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + + if (!pipe_ctx->plane_state || !pipe_ctx->stream) + continue; + + resource_build_scaling_params(pipe_ctx); + } + + while (!found_supported_config && dcn32_enough_pipes_for_subvp(dc, context) && + dcn32_assign_subvp_pipe(dc, context, &dc_pipe_idx)) { + /* For the case where *vlevel = num_states, bandwidth validation has failed for this config. + * Adding phantom pipes won't change the validation result, so change the DML input param + * for P-State support before adding phantom pipes and recalculating the DML result. + * However, this case is only applicable for SubVP + DRR cases because the prefetch mode + * will not allow for switch in VBLANK. The DRR display must have it's VBLANK stretched + * enough to support MCLK switching. + */ + if (*vlevel == context->bw_ctx.dml.soc.num_states && + context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final == + dm_prefetch_support_uclk_fclk_and_stutter) { + context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = + dm_prefetch_support_stutter; + /* There are params (such as FabricClock) that need to be recalculated + * after validation fails (otherwise it will be 0). Calculation for + * phantom vactive requires call into DML, so we must ensure all the + * vba params are valid otherwise we'll get incorrect phantom vactive. + */ + *vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt); + } + + dc->res_pool->funcs->add_phantom_pipes(dc, context, pipes, *pipe_cnt, dc_pipe_idx); + + *pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, false); + // Populate dppclk to trigger a recalculate in dml_get_voltage_level + // so the phantom pipe DLG params can be assigned correctly. + pipes[0].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, *pipe_cnt, 0); + *vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt); + + if (*vlevel < context->bw_ctx.dml.soc.num_states && + vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] != dm_dram_clock_change_unsupported + && subvp_validate_static_schedulability(dc, context, *vlevel)) { + found_supported_config = true; + } else if (*vlevel < context->bw_ctx.dml.soc.num_states && + vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported) { + /* Case where 1 SubVP is added, and DML reports MCLK unsupported. This handles + * the case for SubVP + DRR, where the DRR display does not support MCLK switch + * at it's native refresh rate / timing. + */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + pipe = &context->res_ctx.pipe_ctx[i]; + if (pipe->stream && pipe->plane_state && !pipe->top_pipe && + pipe->stream->mall_stream_config.type == SUBVP_NONE) { + non_subvp_pipes++; + // Use ignore_msa_timing_param flag to identify as DRR + if (pipe->stream->ignore_msa_timing_param) { + drr_pipe_found = true; + drr_pipe_index = i; + } + } + } + // If there is only 1 remaining non SubVP pipe that is DRR, check static + // schedulability for SubVP + DRR. + if (non_subvp_pipes == 1 && drr_pipe_found) { + found_supported_config = subvp_drr_schedulable(dc, context, + &context->res_ctx.pipe_ctx[drr_pipe_index]); + } + } + } + + // If SubVP pipe config is unsupported (or cannot be used for UCLK switching) + // remove phantom pipes and repopulate dml pipes + if (!found_supported_config) { + dc->res_pool->funcs->remove_phantom_pipes(dc, context); + vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] = dm_dram_clock_change_unsupported; + *pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, false); + + *vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt); + /* This may adjust vlevel and maxMpcComb */ + if (*vlevel < context->bw_ctx.dml.soc.num_states) { + *vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, *vlevel, split, merge); + vba->VoltageLevel = *vlevel; + } + } else { + // Most populate phantom DLG params before programming hardware / timing for phantom pipe + dcn32_helper_populate_phantom_dlg_params(dc, context, pipes, *pipe_cnt); + + /* Call validate_apply_pipe_split flags after calling DML getters for + * phantom dlg params, or some of the VBA params indicating pipe split + * can be overwritten by the getters. + * + * When setting up SubVP config, all pipes are merged before attempting to + * add phantom pipes. If pipe split (ODM / MPC) is required, both the main + * and phantom pipes will be split in the regular pipe splitting sequence. + */ + memset(split, 0, MAX_PIPES * sizeof(int)); + memset(merge, 0, MAX_PIPES * sizeof(bool)); + *vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, *vlevel, split, merge); + vba->VoltageLevel = *vlevel; + // Note: We can't apply the phantom pipes to hardware at this time. We have to wait + // until driver has acquired the DMCUB lock to do it safely. + } + } +} + +static bool is_dtbclk_required(struct dc *dc, struct dc_state *context) +{ + int i; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + if (!context->res_ctx.pipe_ctx[i].stream) + continue; + if (is_dp_128b_132b_signal(&context->res_ctx.pipe_ctx[i])) + return true; + } + return false; +} + +static void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, int vlevel) +{ + int i, pipe_idx, active_hubp_count = 0; + bool usr_retraining_support = false; + bool unbounded_req_enabled = false; + + dc_assert_fp_enabled(); + + /* Writeback MCIF_WB arbitration parameters */ + dc->res_pool->funcs->set_mcif_arb_params(dc, context, pipes, pipe_cnt); + + context->bw_ctx.bw.dcn.clk.dispclk_khz = context->bw_ctx.dml.vba.DISPCLK * 1000; + context->bw_ctx.bw.dcn.clk.dcfclk_khz = context->bw_ctx.dml.vba.DCFCLK * 1000; + context->bw_ctx.bw.dcn.clk.socclk_khz = context->bw_ctx.dml.vba.SOCCLK * 1000; + context->bw_ctx.bw.dcn.clk.dramclk_khz = context->bw_ctx.dml.vba.DRAMSpeed * 1000 / 16; + context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = context->bw_ctx.dml.vba.DCFCLKDeepSleep * 1000; + context->bw_ctx.bw.dcn.clk.fclk_khz = context->bw_ctx.dml.vba.FabricClock * 1000; + context->bw_ctx.bw.dcn.clk.p_state_change_support = + context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] + != dm_dram_clock_change_unsupported; + context->bw_ctx.bw.dcn.clk.num_ways = dcn32_helper_calculate_num_ways_for_subvp(dc, context); + + context->bw_ctx.bw.dcn.clk.dppclk_khz = 0; + context->bw_ctx.bw.dcn.clk.dtbclk_en = is_dtbclk_required(dc, context); + context->bw_ctx.bw.dcn.clk.ref_dtbclk_khz = context->bw_ctx.dml.vba.DTBCLKPerState[vlevel] * 1000; + if (context->bw_ctx.dml.vba.FCLKChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] == dm_fclock_change_unsupported) + context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = false; + else + context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = true; + + usr_retraining_support = context->bw_ctx.dml.vba.USRRetrainingSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; + ASSERT(usr_retraining_support); + + if (context->bw_ctx.bw.dcn.clk.dispclk_khz < dc->debug.min_disp_clk_khz) + context->bw_ctx.bw.dcn.clk.dispclk_khz = dc->debug.min_disp_clk_khz; + + unbounded_req_enabled = get_unbounded_request_enabled(&context->bw_ctx.dml, pipes, pipe_cnt); + + if (unbounded_req_enabled && pipe_cnt > 1) { + // Unbounded requesting should not ever be used when more than 1 pipe is enabled. + ASSERT(false); + unbounded_req_enabled = false; + } + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + if (!context->res_ctx.pipe_ctx[i].stream) + continue; + if (context->res_ctx.pipe_ctx[i].plane_state) + active_hubp_count++; + pipes[pipe_idx].pipe.dest.vstartup_start = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, + pipe_idx); + pipes[pipe_idx].pipe.dest.vupdate_offset = get_vupdate_offset(&context->bw_ctx.dml, pipes, pipe_cnt, + pipe_idx); + pipes[pipe_idx].pipe.dest.vupdate_width = get_vupdate_width(&context->bw_ctx.dml, pipes, pipe_cnt, + pipe_idx); + pipes[pipe_idx].pipe.dest.vready_offset = get_vready_offset(&context->bw_ctx.dml, pipes, pipe_cnt, + pipe_idx); + + if (context->res_ctx.pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM) { + // Phantom pipe requires that DET_SIZE = 0 and no unbounded requests + context->res_ctx.pipe_ctx[i].det_buffer_size_kb = 0; + context->res_ctx.pipe_ctx[i].unbounded_req = false; + } else { + context->res_ctx.pipe_ctx[i].det_buffer_size_kb = get_det_buffer_size_kbytes(&context->bw_ctx.dml, pipes, pipe_cnt, + pipe_idx); + context->res_ctx.pipe_ctx[i].unbounded_req = unbounded_req_enabled; + } + + if (context->bw_ctx.bw.dcn.clk.dppclk_khz < pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000) + context->bw_ctx.bw.dcn.clk.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000; + if (context->res_ctx.pipe_ctx[i].plane_state) + context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000; + else + context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = 0; + context->res_ctx.pipe_ctx[i].pipe_dlg_param = pipes[pipe_idx].pipe.dest; + pipe_idx++; + } + /* If DCN isn't making memory requests we can allow pstate change and lower clocks */ + if (!active_hubp_count) { + context->bw_ctx.bw.dcn.clk.socclk_khz = 0; + context->bw_ctx.bw.dcn.clk.dppclk_khz = 0; + context->bw_ctx.bw.dcn.clk.dcfclk_khz = 0; + context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = 0; + context->bw_ctx.bw.dcn.clk.dramclk_khz = 0; + context->bw_ctx.bw.dcn.clk.fclk_khz = 0; + context->bw_ctx.bw.dcn.clk.p_state_change_support = true; + } + /*save a original dppclock copy*/ + context->bw_ctx.bw.dcn.clk.bw_dppclk_khz = context->bw_ctx.bw.dcn.clk.dppclk_khz; + context->bw_ctx.bw.dcn.clk.bw_dispclk_khz = context->bw_ctx.bw.dcn.clk.dispclk_khz; + context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz = context->bw_ctx.dml.soc.clock_limits[vlevel].dppclk_mhz + * 1000; + context->bw_ctx.bw.dcn.clk.max_supported_dispclk_khz = context->bw_ctx.dml.soc.clock_limits[vlevel].dispclk_mhz + * 1000; + + context->bw_ctx.bw.dcn.compbuf_size_kb = context->bw_ctx.dml.ip.config_return_buffer_size_in_kbytes; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + if (context->res_ctx.pipe_ctx[i].stream) + context->bw_ctx.bw.dcn.compbuf_size_kb -= context->res_ctx.pipe_ctx[i].det_buffer_size_kb; + } + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + + if (!context->res_ctx.pipe_ctx[i].stream) + continue; + + context->bw_ctx.dml.funcs.rq_dlg_get_dlg_reg_v2(&context->bw_ctx.dml, + &context->res_ctx.pipe_ctx[i].dlg_regs, &context->res_ctx.pipe_ctx[i].ttu_regs, pipes, + pipe_cnt, pipe_idx); + + context->bw_ctx.dml.funcs.rq_dlg_get_rq_reg_v2(&context->res_ctx.pipe_ctx[i].rq_regs, + &context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + pipe_idx++; + } +} + +static struct pipe_ctx *dcn32_find_split_pipe( + struct dc *dc, + struct dc_state *context, + int old_index) +{ + struct pipe_ctx *pipe = NULL; + int i; + + if (old_index >= 0 && context->res_ctx.pipe_ctx[old_index].stream == NULL) { + pipe = &context->res_ctx.pipe_ctx[old_index]; + pipe->pipe_idx = old_index; + } + + if (!pipe) + for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) { + if (dc->current_state->res_ctx.pipe_ctx[i].top_pipe == NULL + && dc->current_state->res_ctx.pipe_ctx[i].prev_odm_pipe == NULL) { + if (context->res_ctx.pipe_ctx[i].stream == NULL) { + pipe = &context->res_ctx.pipe_ctx[i]; + pipe->pipe_idx = i; + break; + } + } + } + + /* + * May need to fix pipes getting tossed from 1 opp to another on flip + * Add for debugging transient underflow during topology updates: + * ASSERT(pipe); + */ + if (!pipe) + for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) { + if (context->res_ctx.pipe_ctx[i].stream == NULL) { + pipe = &context->res_ctx.pipe_ctx[i]; + pipe->pipe_idx = i; + break; + } + } + + return pipe; +} + +static bool dcn32_split_stream_for_mpc_or_odm( + const struct dc *dc, + struct resource_context *res_ctx, + struct pipe_ctx *pri_pipe, + struct pipe_ctx *sec_pipe, + bool odm) +{ + int pipe_idx = sec_pipe->pipe_idx; + const struct resource_pool *pool = dc->res_pool; + + DC_LOGGER_INIT(dc->ctx->logger); + + if (odm && pri_pipe->plane_state) { + /* ODM + window MPO, where MPO window is on left half only */ + if (pri_pipe->plane_state->clip_rect.x + pri_pipe->plane_state->clip_rect.width <= + pri_pipe->stream->src.x + pri_pipe->stream->src.width/2) { + + DC_LOG_SCALER("%s - ODM + window MPO(left). pri_pipe:%d\n", + __func__, + pri_pipe->pipe_idx); + return true; + } + + /* ODM + window MPO, where MPO window is on right half only */ + if (pri_pipe->plane_state->clip_rect.x >= pri_pipe->stream->src.x + pri_pipe->stream->src.width/2) { + + DC_LOG_SCALER("%s - ODM + window MPO(right). pri_pipe:%d\n", + __func__, + pri_pipe->pipe_idx); + return true; + } + } + + *sec_pipe = *pri_pipe; + + sec_pipe->pipe_idx = pipe_idx; + sec_pipe->plane_res.mi = pool->mis[pipe_idx]; + sec_pipe->plane_res.hubp = pool->hubps[pipe_idx]; + sec_pipe->plane_res.ipp = pool->ipps[pipe_idx]; + sec_pipe->plane_res.xfm = pool->transforms[pipe_idx]; + sec_pipe->plane_res.dpp = pool->dpps[pipe_idx]; + sec_pipe->plane_res.mpcc_inst = pool->dpps[pipe_idx]->inst; + sec_pipe->stream_res.dsc = NULL; + if (odm) { + if (pri_pipe->next_odm_pipe) { + ASSERT(pri_pipe->next_odm_pipe != sec_pipe); + sec_pipe->next_odm_pipe = pri_pipe->next_odm_pipe; + sec_pipe->next_odm_pipe->prev_odm_pipe = sec_pipe; + } + if (pri_pipe->top_pipe && pri_pipe->top_pipe->next_odm_pipe) { + pri_pipe->top_pipe->next_odm_pipe->bottom_pipe = sec_pipe; + sec_pipe->top_pipe = pri_pipe->top_pipe->next_odm_pipe; + } + if (pri_pipe->bottom_pipe && pri_pipe->bottom_pipe->next_odm_pipe) { + pri_pipe->bottom_pipe->next_odm_pipe->top_pipe = sec_pipe; + sec_pipe->bottom_pipe = pri_pipe->bottom_pipe->next_odm_pipe; + } + pri_pipe->next_odm_pipe = sec_pipe; + sec_pipe->prev_odm_pipe = pri_pipe; + ASSERT(sec_pipe->top_pipe == NULL); + + if (!sec_pipe->top_pipe) + sec_pipe->stream_res.opp = pool->opps[pipe_idx]; + else + sec_pipe->stream_res.opp = sec_pipe->top_pipe->stream_res.opp; + if (sec_pipe->stream->timing.flags.DSC == 1) { + dcn20_acquire_dsc(dc, res_ctx, &sec_pipe->stream_res.dsc, pipe_idx); + ASSERT(sec_pipe->stream_res.dsc); + if (sec_pipe->stream_res.dsc == NULL) + return false; + } + } else { + if (pri_pipe->bottom_pipe) { + ASSERT(pri_pipe->bottom_pipe != sec_pipe); + sec_pipe->bottom_pipe = pri_pipe->bottom_pipe; + sec_pipe->bottom_pipe->top_pipe = sec_pipe; + } + pri_pipe->bottom_pipe = sec_pipe; + sec_pipe->top_pipe = pri_pipe; + + ASSERT(pri_pipe->plane_state); + } + + return true; +} + +bool dcn32_internal_validate_bw(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int *pipe_cnt_out, + int *vlevel_out, + bool fast_validate) +{ + bool out = false; + bool repopulate_pipes = false; + int split[MAX_PIPES] = { 0 }; + bool merge[MAX_PIPES] = { false }; + bool newly_split[MAX_PIPES] = { false }; + int pipe_cnt, i, pipe_idx; + int vlevel = context->bw_ctx.dml.soc.num_states; + struct vba_vars_st *vba = &context->bw_ctx.dml.vba; + + dc_assert_fp_enabled(); + + ASSERT(pipes); + if (!pipes) + return false; + + // For each full update, remove all existing phantom pipes first + dc->res_pool->funcs->remove_phantom_pipes(dc, context); + + dc->res_pool->funcs->update_soc_for_wm_a(dc, context); + + pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate); + + if (!pipe_cnt) { + out = true; + goto validate_out; + } + + dml_log_pipe_params(&context->bw_ctx.dml, pipes, pipe_cnt); + + if (!fast_validate) + dcn32_full_validate_bw_helper(dc, context, pipes, &vlevel, split, merge, &pipe_cnt); + + if (fast_validate || + (dc->debug.dml_disallow_alternate_prefetch_modes && + (vlevel == context->bw_ctx.dml.soc.num_states || + vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported))) { + /* + * If dml_disallow_alternate_prefetch_modes is false, then we have already + * tried alternate prefetch modes during full validation. + * + * If mode is unsupported or there is no p-state support, then + * fall back to favouring voltage. + * + * If Prefetch mode 0 failed for this config, or passed with Max UCLK, then try + * to support with Prefetch mode 1 (dm_prefetch_support_fclk_and_stutter == 2) + */ + context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = + dm_prefetch_support_fclk_and_stutter; + + vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt); + + /* Last attempt with Prefetch mode 2 (dm_prefetch_support_stutter == 3) */ + if (vlevel == context->bw_ctx.dml.soc.num_states) { + context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = + dm_prefetch_support_stutter; + vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt); + } + + if (vlevel < context->bw_ctx.dml.soc.num_states) { + memset(split, 0, sizeof(split)); + memset(merge, 0, sizeof(merge)); + vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, merge); + // dcn20_validate_apply_pipe_split_flags can modify voltage level outside of DML + vba->VoltageLevel = vlevel; + } + } + + dml_log_mode_support_params(&context->bw_ctx.dml); + + if (vlevel == context->bw_ctx.dml.soc.num_states) + goto validate_fail; + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + struct pipe_ctx *mpo_pipe = pipe->bottom_pipe; + + if (!pipe->stream) + continue; + + if (vba->ODMCombineEnabled[vba->pipe_plane[pipe_idx]] != dm_odm_combine_mode_disabled + && !dc->config.enable_windowed_mpo_odm + && pipe->plane_state && mpo_pipe + && memcmp(&mpo_pipe->plane_res.scl_data.recout, + &pipe->plane_res.scl_data.recout, + sizeof(struct rect)) != 0) { + ASSERT(mpo_pipe->plane_state != pipe->plane_state); + goto validate_fail; + } + pipe_idx++; + } + + /* merge pipes if necessary */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + /*skip pipes that don't need merging*/ + if (!merge[i]) + continue; + + /* if ODM merge we ignore mpc tree, mpo pipes will have their own flags */ + if (pipe->prev_odm_pipe) { + /*split off odm pipe*/ + pipe->prev_odm_pipe->next_odm_pipe = pipe->next_odm_pipe; + if (pipe->next_odm_pipe) + pipe->next_odm_pipe->prev_odm_pipe = pipe->prev_odm_pipe; + + /*2:1ODM+MPC Split MPO to Single Pipe + MPC Split MPO*/ + if (pipe->bottom_pipe) { + if (pipe->bottom_pipe->prev_odm_pipe || pipe->bottom_pipe->next_odm_pipe) { + /*MPC split rules will handle this case*/ + pipe->bottom_pipe->top_pipe = NULL; + } else { + /* when merging an ODM pipes, the bottom MPC pipe must now point to + * the previous ODM pipe and its associated stream assets + */ + if (pipe->prev_odm_pipe->bottom_pipe) { + /* 3 plane MPO*/ + pipe->bottom_pipe->top_pipe = pipe->prev_odm_pipe->bottom_pipe; + pipe->prev_odm_pipe->bottom_pipe->bottom_pipe = pipe->bottom_pipe; + } else { + /* 2 plane MPO*/ + pipe->bottom_pipe->top_pipe = pipe->prev_odm_pipe; + pipe->prev_odm_pipe->bottom_pipe = pipe->bottom_pipe; + } + + memcpy(&pipe->bottom_pipe->stream_res, &pipe->bottom_pipe->top_pipe->stream_res, sizeof(struct stream_resource)); + } + } + + if (pipe->top_pipe) { + pipe->top_pipe->bottom_pipe = NULL; + } + + pipe->bottom_pipe = NULL; + pipe->next_odm_pipe = NULL; + pipe->plane_state = NULL; + pipe->stream = NULL; + pipe->top_pipe = NULL; + pipe->prev_odm_pipe = NULL; + if (pipe->stream_res.dsc) + dcn20_release_dsc(&context->res_ctx, dc->res_pool, &pipe->stream_res.dsc); + memset(&pipe->plane_res, 0, sizeof(pipe->plane_res)); + memset(&pipe->stream_res, 0, sizeof(pipe->stream_res)); + repopulate_pipes = true; + } else if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state) { + struct pipe_ctx *top_pipe = pipe->top_pipe; + struct pipe_ctx *bottom_pipe = pipe->bottom_pipe; + + top_pipe->bottom_pipe = bottom_pipe; + if (bottom_pipe) + bottom_pipe->top_pipe = top_pipe; + + pipe->top_pipe = NULL; + pipe->bottom_pipe = NULL; + pipe->plane_state = NULL; + pipe->stream = NULL; + memset(&pipe->plane_res, 0, sizeof(pipe->plane_res)); + memset(&pipe->stream_res, 0, sizeof(pipe->stream_res)); + repopulate_pipes = true; + } else + ASSERT(0); /* Should never try to merge master pipe */ + + } + + for (i = 0, pipe_idx = -1; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + struct pipe_ctx *hsplit_pipe = NULL; + bool odm; + int old_index = -1; + + if (!pipe->stream || newly_split[i]) + continue; + + pipe_idx++; + odm = vba->ODMCombineEnabled[vba->pipe_plane[pipe_idx]] != dm_odm_combine_mode_disabled; + + if (!pipe->plane_state && !odm) + continue; + + if (split[i]) { + if (odm) { + if (split[i] == 4 && old_pipe->next_odm_pipe && old_pipe->next_odm_pipe->next_odm_pipe) + old_index = old_pipe->next_odm_pipe->next_odm_pipe->pipe_idx; + else if (old_pipe->next_odm_pipe) + old_index = old_pipe->next_odm_pipe->pipe_idx; + } else { + if (split[i] == 4 && old_pipe->bottom_pipe && old_pipe->bottom_pipe->bottom_pipe && + old_pipe->bottom_pipe->bottom_pipe->plane_state == old_pipe->plane_state) + old_index = old_pipe->bottom_pipe->bottom_pipe->pipe_idx; + else if (old_pipe->bottom_pipe && + old_pipe->bottom_pipe->plane_state == old_pipe->plane_state) + old_index = old_pipe->bottom_pipe->pipe_idx; + } + hsplit_pipe = dcn32_find_split_pipe(dc, context, old_index); + ASSERT(hsplit_pipe); + if (!hsplit_pipe) + goto validate_fail; + + if (!dcn32_split_stream_for_mpc_or_odm( + dc, &context->res_ctx, + pipe, hsplit_pipe, odm)) + goto validate_fail; + + newly_split[hsplit_pipe->pipe_idx] = true; + repopulate_pipes = true; + } + if (split[i] == 4) { + struct pipe_ctx *pipe_4to1; + + if (odm && old_pipe->next_odm_pipe) + old_index = old_pipe->next_odm_pipe->pipe_idx; + else if (!odm && old_pipe->bottom_pipe && + old_pipe->bottom_pipe->plane_state == old_pipe->plane_state) + old_index = old_pipe->bottom_pipe->pipe_idx; + else + old_index = -1; + pipe_4to1 = dcn32_find_split_pipe(dc, context, old_index); + ASSERT(pipe_4to1); + if (!pipe_4to1) + goto validate_fail; + if (!dcn32_split_stream_for_mpc_or_odm( + dc, &context->res_ctx, + pipe, pipe_4to1, odm)) + goto validate_fail; + newly_split[pipe_4to1->pipe_idx] = true; + + if (odm && old_pipe->next_odm_pipe && old_pipe->next_odm_pipe->next_odm_pipe + && old_pipe->next_odm_pipe->next_odm_pipe->next_odm_pipe) + old_index = old_pipe->next_odm_pipe->next_odm_pipe->next_odm_pipe->pipe_idx; + else if (!odm && old_pipe->bottom_pipe && old_pipe->bottom_pipe->bottom_pipe && + old_pipe->bottom_pipe->bottom_pipe->bottom_pipe && + old_pipe->bottom_pipe->bottom_pipe->bottom_pipe->plane_state == old_pipe->plane_state) + old_index = old_pipe->bottom_pipe->bottom_pipe->bottom_pipe->pipe_idx; + else + old_index = -1; + pipe_4to1 = dcn32_find_split_pipe(dc, context, old_index); + ASSERT(pipe_4to1); + if (!pipe_4to1) + goto validate_fail; + if (!dcn32_split_stream_for_mpc_or_odm( + dc, &context->res_ctx, + hsplit_pipe, pipe_4to1, odm)) + goto validate_fail; + newly_split[pipe_4to1->pipe_idx] = true; + } + if (odm) + dcn20_build_mapped_resource(dc, context, pipe->stream); + } + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (pipe->plane_state) { + if (!resource_build_scaling_params(pipe)) + goto validate_fail; + } + } + + /* Actual dsc count per stream dsc validation*/ + if (!dcn20_validate_dsc(dc, context)) { + vba->ValidationStatus[vba->soc.num_states] = DML_FAIL_DSC_VALIDATION_FAILURE; + goto validate_fail; + } + + if (repopulate_pipes) { + pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate); + + /* repopulate_pipes = 1 means the pipes were either split or merged. In this case + * we have to re-calculate the DET allocation and run through DML once more to + * ensure all the params are calculated correctly. We do not need to run the + * pipe split check again after this call (pipes are already split / merged). + * */ + if (!fast_validate) { + context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = + dm_prefetch_support_uclk_fclk_and_stutter_if_possible; + vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt); + } + } + *vlevel_out = vlevel; + *pipe_cnt_out = pipe_cnt; + + out = true; + goto validate_out; + +validate_fail: + out = false; + +validate_out: + return out; +} + + +void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel) +{ + int i, pipe_idx, vlevel_temp = 0; + double dcfclk = dcn3_2_soc.clock_limits[0].dcfclk_mhz; + double dcfclk_from_validation = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; + double dram_speed_from_validation = context->bw_ctx.dml.vba.DRAMSpeed; + double dcfclk_from_fw_based_mclk_switching = dcfclk_from_validation; + bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] != + dm_dram_clock_change_unsupported; + unsigned int dummy_latency_index = 0; + int maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb; + unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed; + unsigned int min_dram_speed_mts_margin; + + dc_assert_fp_enabled(); + + // Override DRAMClockChangeSupport for SubVP + DRR case where the DRR cannot switch without stretching it's VBLANK + if (!pstate_en && dcn32_subvp_in_use(dc, context)) { + context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] = dm_dram_clock_change_vblank_w_mall_sub_vp; + pstate_en = true; + } + + context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = false; + + if (!pstate_en) { + /* only when the mclk switch can not be natural, is the fw based vblank stretch attempted */ + context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = + dcn30_can_support_mclk_switch_using_fw_based_vblank_stretch(dc, context); + + if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) { + dummy_latency_index = dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(dc, + context, pipes, pipe_cnt, vlevel); + + /* After calling dcn30_find_dummy_latency_index_for_fw_based_mclk_switch + * we reinstate the original dram_clock_change_latency_us on the context + * and all variables that may have changed up to this point, except the + * newly found dummy_latency_index + */ + context->bw_ctx.dml.soc.dram_clock_change_latency_us = + dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; + /* For DCN32/321 need to validate with fclk pstate change latency equal to dummy so + * prefetch is scheduled correctly to account for dummy pstate. + */ + if (dummy_latency_index == 0) + context->bw_ctx.dml.soc.fclk_change_latency_us = + dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us; + dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, false); + maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb; + dcfclk_from_fw_based_mclk_switching = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; + pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] != + dm_dram_clock_change_unsupported; + } + } + + /* Set B: + * For Set B calculations use clocks from clock_limits[2] when available i.e. when SMU is present, + * otherwise use arbitrary low value from spreadsheet for DCFCLK as lower is safer for watermark + * calculations to cover bootup clocks. + * DCFCLK: soc.clock_limits[2] when available + * UCLK: soc.clock_limits[2] when available + */ + if (dcn3_2_soc.num_states > 2) { + vlevel_temp = 2; + dcfclk = dcn3_2_soc.clock_limits[2].dcfclk_mhz; + } else + dcfclk = 615; //DCFCLK Vmin_lv + + pipes[0].clks_cfg.voltage = vlevel_temp; + pipes[0].clks_cfg.dcfclk_mhz = dcfclk; + pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel_temp].socclk_mhz; + + if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].valid) { + context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us; + context->bw_ctx.dml.soc.fclk_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.fclk_change_latency_us; + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us; + context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us; + } + context->bw_ctx.bw.dcn.watermarks.b.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.usr_retraining_ns = get_usr_retraining_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + + /* Set D: + * All clocks min. + * DCFCLK: Min, as reported by PM FW when available + * UCLK : Min, as reported by PM FW when available + * sr_enter_exit/sr_exit should be lower than used for DRAM (TBD after bringup or later, use as decided in Clk Mgr) + */ + + if (dcn3_2_soc.num_states > 2) { + vlevel_temp = 0; + dcfclk = dc->clk_mgr->bw_params->clk_table.entries[0].dcfclk_mhz; + } else + dcfclk = 615; //DCFCLK Vmin_lv + + pipes[0].clks_cfg.voltage = vlevel_temp; + pipes[0].clks_cfg.dcfclk_mhz = dcfclk; + pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel_temp].socclk_mhz; + + if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].valid) { + context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us; + context->bw_ctx.dml.soc.fclk_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.fclk_change_latency_us; + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us; + context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us; + } + context->bw_ctx.bw.dcn.watermarks.d.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.usr_retraining_ns = get_usr_retraining_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + + /* Set C, for Dummy P-State: + * All clocks min. + * DCFCLK: Min, as reported by PM FW, when available + * UCLK : Min, as reported by PM FW, when available + * pstate latency as per UCLK state dummy pstate latency + */ + + // For Set A and Set C use values from validation + pipes[0].clks_cfg.voltage = vlevel; + pipes[0].clks_cfg.dcfclk_mhz = dcfclk_from_validation; + pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz; + + if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) { + pipes[0].clks_cfg.dcfclk_mhz = dcfclk_from_fw_based_mclk_switching; + } + + if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) { + min_dram_speed_mts = dram_speed_from_validation; + min_dram_speed_mts_margin = 160; + + context->bw_ctx.dml.soc.dram_clock_change_latency_us = + dc->clk_mgr->bw_params->dummy_pstate_table[0].dummy_pstate_latency_us; + + if (context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] == + dm_dram_clock_change_unsupported) { + int min_dram_speed_mts_offset = dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_memclk_levels - 1; + + min_dram_speed_mts = + dc->clk_mgr->bw_params->clk_table.entries[min_dram_speed_mts_offset].memclk_mhz * 16; + } + + if (!context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) { + /* find largest table entry that is lower than dram speed, + * but lower than DPM0 still uses DPM0 + */ + for (dummy_latency_index = 3; dummy_latency_index > 0; dummy_latency_index--) + if (min_dram_speed_mts + min_dram_speed_mts_margin > + dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dram_speed_mts) + break; + } + + context->bw_ctx.dml.soc.dram_clock_change_latency_us = + dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us; + + context->bw_ctx.dml.soc.fclk_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.fclk_change_latency_us; + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us; + context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us; + } + + context->bw_ctx.bw.dcn.watermarks.c.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + /* On DCN32/321, PMFW will set PSTATE_CHANGE_TYPE = 1 (FCLK) for UCLK dummy p-state. + * In this case we must program FCLK WM Set C to use the UCLK dummy p-state WM + * value. + */ + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.fclk_pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.usr_retraining_ns = get_usr_retraining_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + + if ((!pstate_en) && (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid)) { + /* The only difference between A and C is p-state latency, if p-state is not supported + * with full p-state latency we want to calculate DLG based on dummy p-state latency, + * Set A p-state watermark set to 0 on DCN30, when p-state unsupported, for now keep as DCN30. + */ + context->bw_ctx.bw.dcn.watermarks.a = context->bw_ctx.bw.dcn.watermarks.c; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 0; + /* Calculate FCLK p-state change watermark based on FCLK pstate change latency in case + * UCLK p-state is not supported, to avoid underflow in case FCLK pstate is supported + */ + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + } else { + /* Set A: + * All clocks min. + * DCFCLK: Min, as reported by PM FW, when available + * UCLK: Min, as reported by PM FW, when available + */ + dc->res_pool->funcs->update_soc_for_wm_a(dc, context); + context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.usr_retraining_ns = get_usr_retraining_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + } + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + if (!context->res_ctx.pipe_ctx[i].stream) + continue; + + pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt); + pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + + if (dc->config.forced_clocks) { + pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz; + pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz; + } + if (dc->debug.min_disp_clk_khz > pipes[pipe_idx].clks_cfg.dispclk_mhz * 1000) + pipes[pipe_idx].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0; + if (dc->debug.min_dpp_clk_khz > pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000) + pipes[pipe_idx].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0; + + pipe_idx++; + } + + context->perf_params.stutter_period_us = context->bw_ctx.dml.vba.StutterPeriod; + + if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching && dummy_latency_index == 0) + context->bw_ctx.dml.soc.fclk_change_latency_us = + dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us; + + dcn32_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel); + + if (!pstate_en) + /* Restore full p-state latency */ + context->bw_ctx.dml.soc.dram_clock_change_latency_us = + dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; + + if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) { + dcn30_setup_mclk_switch_using_fw_based_vblank_stretch(dc, context); + if (dummy_latency_index == 0) + context->bw_ctx.dml.soc.fclk_change_latency_us = + dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.fclk_change_latency_us; + } +} + +static void dcn32_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts, + unsigned int *optimal_dcfclk, + unsigned int *optimal_fclk) +{ + double bw_from_dram, bw_from_dram1, bw_from_dram2; + + bw_from_dram1 = uclk_mts * dcn3_2_soc.num_chans * + dcn3_2_soc.dram_channel_width_bytes * (dcn3_2_soc.max_avg_dram_bw_use_normal_percent / 100); + bw_from_dram2 = uclk_mts * dcn3_2_soc.num_chans * + dcn3_2_soc.dram_channel_width_bytes * (dcn3_2_soc.max_avg_sdp_bw_use_normal_percent / 100); + + bw_from_dram = (bw_from_dram1 < bw_from_dram2) ? bw_from_dram1 : bw_from_dram2; + + if (optimal_fclk) + *optimal_fclk = bw_from_dram / + (dcn3_2_soc.fabric_datapath_to_dcn_data_return_bytes * (dcn3_2_soc.max_avg_sdp_bw_use_normal_percent / 100)); + + if (optimal_dcfclk) + *optimal_dcfclk = bw_from_dram / + (dcn3_2_soc.return_bus_width_bytes * (dcn3_2_soc.max_avg_sdp_bw_use_normal_percent / 100)); +} + +static void remove_entry_from_table_at_index(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries, + unsigned int index) +{ + int i; + + if (*num_entries == 0) + return; + + for (i = index; i < *num_entries - 1; i++) { + table[i] = table[i + 1]; + } + memset(&table[--(*num_entries)], 0, sizeof(struct _vcs_dpi_voltage_scaling_st)); +} + +void dcn32_patch_dpm_table(struct clk_bw_params *bw_params) +{ + int i; + unsigned int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, + max_phyclk_mhz = 0, max_dtbclk_mhz = 0, max_fclk_mhz = 0, max_uclk_mhz = 0; + + for (i = 0; i < MAX_NUM_DPM_LVL; i++) { + if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz) + max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz; + if (bw_params->clk_table.entries[i].fclk_mhz > max_fclk_mhz) + max_fclk_mhz = bw_params->clk_table.entries[i].fclk_mhz; + if (bw_params->clk_table.entries[i].memclk_mhz > max_uclk_mhz) + max_uclk_mhz = bw_params->clk_table.entries[i].memclk_mhz; + if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz) + max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz; + if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz) + max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz; + if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz) + max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz; + if (bw_params->clk_table.entries[i].dtbclk_mhz > max_dtbclk_mhz) + max_dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; + } + + /* Scan through clock values we currently have and if they are 0, + * then populate it with dcn3_2_soc.clock_limits[] value. + * + * Do it for DCFCLK, DISPCLK, DTBCLK and UCLK as any of those being + * 0, will cause it to skip building the clock table. + */ + if (max_dcfclk_mhz == 0) + bw_params->clk_table.entries[0].dcfclk_mhz = dcn3_2_soc.clock_limits[0].dcfclk_mhz; + if (max_dispclk_mhz == 0) + bw_params->clk_table.entries[0].dispclk_mhz = dcn3_2_soc.clock_limits[0].dispclk_mhz; + if (max_dtbclk_mhz == 0) + bw_params->clk_table.entries[0].dtbclk_mhz = dcn3_2_soc.clock_limits[0].dtbclk_mhz; + if (max_uclk_mhz == 0) + bw_params->clk_table.entries[0].memclk_mhz = dcn3_2_soc.clock_limits[0].dram_speed_mts / 16; +} + +static int build_synthetic_soc_states(struct clk_bw_params *bw_params, + struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries) +{ + int i, j; + struct _vcs_dpi_voltage_scaling_st entry = {0}; + + unsigned int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, + max_phyclk_mhz = 0, max_dtbclk_mhz = 0, max_fclk_mhz = 0, max_uclk_mhz = 0; + + unsigned int min_dcfclk_mhz = 199, min_fclk_mhz = 299; + + static const unsigned int num_dcfclk_stas = 5; + unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {199, 615, 906, 1324, 1564}; + + unsigned int num_uclk_dpms = 0; + unsigned int num_fclk_dpms = 0; + unsigned int num_dcfclk_dpms = 0; + + for (i = 0; i < MAX_NUM_DPM_LVL; i++) { + if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz) + max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz; + if (bw_params->clk_table.entries[i].fclk_mhz > max_fclk_mhz) + max_fclk_mhz = bw_params->clk_table.entries[i].fclk_mhz; + if (bw_params->clk_table.entries[i].memclk_mhz > max_uclk_mhz) + max_uclk_mhz = bw_params->clk_table.entries[i].memclk_mhz; + if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz) + max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz; + if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz) + max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz; + if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz) + max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz; + if (bw_params->clk_table.entries[i].dtbclk_mhz > max_dtbclk_mhz) + max_dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; + + if (bw_params->clk_table.entries[i].memclk_mhz > 0) + num_uclk_dpms++; + if (bw_params->clk_table.entries[i].fclk_mhz > 0) + num_fclk_dpms++; + if (bw_params->clk_table.entries[i].dcfclk_mhz > 0) + num_dcfclk_dpms++; + } + + if (!max_dcfclk_mhz || !max_dispclk_mhz || !max_dtbclk_mhz) + return -1; + + if (max_dppclk_mhz == 0) + max_dppclk_mhz = max_dispclk_mhz; + + if (max_fclk_mhz == 0) + max_fclk_mhz = max_dcfclk_mhz * dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / dcn3_2_soc.pct_ideal_fabric_bw_after_urgent; + + if (max_phyclk_mhz == 0) + max_phyclk_mhz = dcn3_2_soc.clock_limits[0].phyclk_mhz; + + *num_entries = 0; + entry.dispclk_mhz = max_dispclk_mhz; + entry.dscclk_mhz = max_dispclk_mhz / 3; + entry.dppclk_mhz = max_dppclk_mhz; + entry.dtbclk_mhz = max_dtbclk_mhz; + entry.phyclk_mhz = max_phyclk_mhz; + entry.phyclk_d18_mhz = dcn3_2_soc.clock_limits[0].phyclk_d18_mhz; + entry.phyclk_d32_mhz = dcn3_2_soc.clock_limits[0].phyclk_d32_mhz; + + // Insert all the DCFCLK STAs + for (i = 0; i < num_dcfclk_stas; i++) { + entry.dcfclk_mhz = dcfclk_sta_targets[i]; + entry.fabricclk_mhz = 0; + entry.dram_speed_mts = 0; + + insert_entry_into_table_sorted(table, num_entries, &entry); + } + + // Insert the max DCFCLK + entry.dcfclk_mhz = max_dcfclk_mhz; + entry.fabricclk_mhz = 0; + entry.dram_speed_mts = 0; + + insert_entry_into_table_sorted(table, num_entries, &entry); + + // Insert the UCLK DPMS + for (i = 0; i < num_uclk_dpms; i++) { + entry.dcfclk_mhz = 0; + entry.fabricclk_mhz = 0; + entry.dram_speed_mts = bw_params->clk_table.entries[i].memclk_mhz * 16; + + insert_entry_into_table_sorted(table, num_entries, &entry); + } + + // If FCLK is coarse grained, insert individual DPMs. + if (num_fclk_dpms > 2) { + for (i = 0; i < num_fclk_dpms; i++) { + entry.dcfclk_mhz = 0; + entry.fabricclk_mhz = bw_params->clk_table.entries[i].fclk_mhz; + entry.dram_speed_mts = 0; + + insert_entry_into_table_sorted(table, num_entries, &entry); + } + } + // If FCLK fine grained, only insert max + else { + entry.dcfclk_mhz = 0; + entry.fabricclk_mhz = max_fclk_mhz; + entry.dram_speed_mts = 0; + + insert_entry_into_table_sorted(table, num_entries, &entry); + } + + // At this point, the table contains all "points of interest" based on + // DPMs from PMFW, and STAs. Table is sorted by BW, and all clock + // ratios (by derate, are exact). + + // Remove states that require higher clocks than are supported + for (i = *num_entries - 1; i >= 0 ; i--) { + if (table[i].dcfclk_mhz > max_dcfclk_mhz || + table[i].fabricclk_mhz > max_fclk_mhz || + table[i].dram_speed_mts > max_uclk_mhz * 16) + remove_entry_from_table_at_index(table, num_entries, i); + } + + // At this point, the table only contains supported points of interest + // it could be used as is, but some states may be redundant due to + // coarse grained nature of some clocks, so we want to round up to + // coarse grained DPMs and remove duplicates. + + // Round up UCLKs + for (i = *num_entries - 1; i >= 0 ; i--) { + for (j = 0; j < num_uclk_dpms; j++) { + if (bw_params->clk_table.entries[j].memclk_mhz * 16 >= table[i].dram_speed_mts) { + table[i].dram_speed_mts = bw_params->clk_table.entries[j].memclk_mhz * 16; + break; + } + } + } + + // If FCLK is coarse grained, round up to next DPMs + if (num_fclk_dpms > 2) { + for (i = *num_entries - 1; i >= 0 ; i--) { + for (j = 0; j < num_fclk_dpms; j++) { + if (bw_params->clk_table.entries[j].fclk_mhz >= table[i].fabricclk_mhz) { + table[i].fabricclk_mhz = bw_params->clk_table.entries[j].fclk_mhz; + break; + } + } + } + } + // Otherwise, round up to minimum. + else { + for (i = *num_entries - 1; i >= 0 ; i--) { + if (table[i].fabricclk_mhz < min_fclk_mhz) { + table[i].fabricclk_mhz = min_fclk_mhz; + break; + } + } + } + + // Round DCFCLKs up to minimum + for (i = *num_entries - 1; i >= 0 ; i--) { + if (table[i].dcfclk_mhz < min_dcfclk_mhz) { + table[i].dcfclk_mhz = min_dcfclk_mhz; + break; + } + } + + // Remove duplicate states, note duplicate states are always neighbouring since table is sorted. + i = 0; + while (i < *num_entries - 1) { + if (table[i].dcfclk_mhz == table[i + 1].dcfclk_mhz && + table[i].fabricclk_mhz == table[i + 1].fabricclk_mhz && + table[i].dram_speed_mts == table[i + 1].dram_speed_mts) + remove_entry_from_table_at_index(table, num_entries, i + 1); + else + i++; + } + + // Fix up the state indicies + for (i = *num_entries - 1; i >= 0 ; i--) { + table[i].state = i; + } + + return 0; +} + +/* + * dcn32_update_bw_bounding_box + * + * This would override some dcn3_2 ip_or_soc initial parameters hardcoded from + * spreadsheet with actual values as per dGPU SKU: + * - with passed few options from dc->config + * - with dentist_vco_frequency from Clk Mgr (currently hardcoded, but might + * need to get it from PM FW) + * - with passed latency values (passed in ns units) in dc-> bb override for + * debugging purposes + * - with passed latencies from VBIOS (in 100_ns units) if available for + * certain dGPU SKU + * - with number of DRAM channels from VBIOS (which differ for certain dGPU SKU + * of the same ASIC) + * - clocks levels with passed clk_table entries from Clk Mgr as reported by PM + * FW for different clocks (which might differ for certain dGPU SKU of the + * same ASIC) + */ +void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params) +{ + dc_assert_fp_enabled(); + + if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) { + /* Overrides from dc->config options */ + dcn3_2_ip.clamp_min_dcfclk = dc->config.clamp_min_dcfclk; + + /* Override from passed dc->bb_overrides if available*/ + if ((int)(dcn3_2_soc.sr_exit_time_us * 1000) != dc->bb_overrides.sr_exit_time_ns + && dc->bb_overrides.sr_exit_time_ns) { + dcn3_2_soc.sr_exit_time_us = dc->bb_overrides.sr_exit_time_ns / 1000.0; + } + + if ((int)(dcn3_2_soc.sr_enter_plus_exit_time_us * 1000) + != dc->bb_overrides.sr_enter_plus_exit_time_ns + && dc->bb_overrides.sr_enter_plus_exit_time_ns) { + dcn3_2_soc.sr_enter_plus_exit_time_us = + dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0; + } + + if ((int)(dcn3_2_soc.urgent_latency_us * 1000) != dc->bb_overrides.urgent_latency_ns + && dc->bb_overrides.urgent_latency_ns) { + dcn3_2_soc.urgent_latency_us = dc->bb_overrides.urgent_latency_ns / 1000.0; + dcn3_2_soc.urgent_latency_pixel_data_only_us = dc->bb_overrides.urgent_latency_ns / 1000.0; + } + + if ((int)(dcn3_2_soc.dram_clock_change_latency_us * 1000) + != dc->bb_overrides.dram_clock_change_latency_ns + && dc->bb_overrides.dram_clock_change_latency_ns) { + dcn3_2_soc.dram_clock_change_latency_us = + dc->bb_overrides.dram_clock_change_latency_ns / 1000.0; + } + + if ((int)(dcn3_2_soc.fclk_change_latency_us * 1000) + != dc->bb_overrides.fclk_clock_change_latency_ns + && dc->bb_overrides.fclk_clock_change_latency_ns) { + dcn3_2_soc.fclk_change_latency_us = + dc->bb_overrides.fclk_clock_change_latency_ns / 1000; + } + + if ((int)(dcn3_2_soc.dummy_pstate_latency_us * 1000) + != dc->bb_overrides.dummy_clock_change_latency_ns + && dc->bb_overrides.dummy_clock_change_latency_ns) { + dcn3_2_soc.dummy_pstate_latency_us = + dc->bb_overrides.dummy_clock_change_latency_ns / 1000.0; + } + + /* Override from VBIOS if VBIOS bb_info available */ + if (dc->ctx->dc_bios->funcs->get_soc_bb_info) { + struct bp_soc_bb_info bb_info = {0}; + + if (dc->ctx->dc_bios->funcs->get_soc_bb_info(dc->ctx->dc_bios, &bb_info) == BP_RESULT_OK) { + if (bb_info.dram_clock_change_latency_100ns > 0) + dcn3_2_soc.dram_clock_change_latency_us = + bb_info.dram_clock_change_latency_100ns * 10; + + if (bb_info.dram_sr_enter_exit_latency_100ns > 0) + dcn3_2_soc.sr_enter_plus_exit_time_us = + bb_info.dram_sr_enter_exit_latency_100ns * 10; + + if (bb_info.dram_sr_exit_latency_100ns > 0) + dcn3_2_soc.sr_exit_time_us = + bb_info.dram_sr_exit_latency_100ns * 10; + } + } + + /* Override from VBIOS for num_chan */ + if (dc->ctx->dc_bios->vram_info.num_chans) { + dcn3_2_soc.num_chans = dc->ctx->dc_bios->vram_info.num_chans; + dcn3_2_soc.mall_allocated_for_dcn_mbytes = (double)(dcn32_calc_num_avail_chans_for_mall(dc, + dc->ctx->dc_bios->vram_info.num_chans) * dc->caps.mall_size_per_mem_channel); + } + + if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes) + dcn3_2_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes; + } + + /* DML DSC delay factor workaround */ + dcn3_2_ip.dsc_delay_factor_wa = dc->debug.dsc_delay_factor_wa_x1000 / 1000.0; + + dcn3_2_ip.min_prefetch_in_strobe_us = dc->debug.min_prefetch_in_strobe_ns / 1000.0; + + /* Override dispclk_dppclk_vco_speed_mhz from Clk Mgr */ + dcn3_2_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; + dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; + + /* Overrides Clock levelsfrom CLK Mgr table entries as reported by PM FW */ + if ((!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) && (bw_params->clk_table.entries[0].memclk_mhz)) { + if (dc->debug.use_legacy_soc_bb_mechanism) { + unsigned int i = 0, j = 0, num_states = 0; + + unsigned int dcfclk_mhz[DC__VOLTAGE_STATES] = {0}; + unsigned int dram_speed_mts[DC__VOLTAGE_STATES] = {0}; + unsigned int optimal_uclk_for_dcfclk_sta_targets[DC__VOLTAGE_STATES] = {0}; + unsigned int optimal_dcfclk_for_uclk[DC__VOLTAGE_STATES] = {0}; + unsigned int min_dcfclk = UINT_MAX; + /* Set 199 as first value in STA target array to have a minimum DCFCLK value. + * For DCN32 we set min to 199 so minimum FCLK DPM0 (300Mhz can be achieved) */ + unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {199, 615, 906, 1324, 1564}; + unsigned int num_dcfclk_sta_targets = 4, num_uclk_states = 0; + unsigned int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, max_phyclk_mhz = 0; + + for (i = 0; i < MAX_NUM_DPM_LVL; i++) { + if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz) + max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz; + if (bw_params->clk_table.entries[i].dcfclk_mhz != 0 && + bw_params->clk_table.entries[i].dcfclk_mhz < min_dcfclk) + min_dcfclk = bw_params->clk_table.entries[i].dcfclk_mhz; + if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz) + max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz; + if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz) + max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz; + if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz) + max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz; + } + if (min_dcfclk > dcfclk_sta_targets[0]) + dcfclk_sta_targets[0] = min_dcfclk; + if (!max_dcfclk_mhz) + max_dcfclk_mhz = dcn3_2_soc.clock_limits[0].dcfclk_mhz; + if (!max_dispclk_mhz) + max_dispclk_mhz = dcn3_2_soc.clock_limits[0].dispclk_mhz; + if (!max_dppclk_mhz) + max_dppclk_mhz = dcn3_2_soc.clock_limits[0].dppclk_mhz; + if (!max_phyclk_mhz) + max_phyclk_mhz = dcn3_2_soc.clock_limits[0].phyclk_mhz; + + if (max_dcfclk_mhz > dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { + // If max DCFCLK is greater than the max DCFCLK STA target, insert into the DCFCLK STA target array + dcfclk_sta_targets[num_dcfclk_sta_targets] = max_dcfclk_mhz; + num_dcfclk_sta_targets++; + } else if (max_dcfclk_mhz < dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { + // If max DCFCLK is less than the max DCFCLK STA target, cap values and remove duplicates + for (i = 0; i < num_dcfclk_sta_targets; i++) { + if (dcfclk_sta_targets[i] > max_dcfclk_mhz) { + dcfclk_sta_targets[i] = max_dcfclk_mhz; + break; + } + } + // Update size of array since we "removed" duplicates + num_dcfclk_sta_targets = i + 1; + } + + num_uclk_states = bw_params->clk_table.num_entries; + + // Calculate optimal dcfclk for each uclk + for (i = 0; i < num_uclk_states; i++) { + dcn32_get_optimal_dcfclk_fclk_for_uclk(bw_params->clk_table.entries[i].memclk_mhz * 16, + &optimal_dcfclk_for_uclk[i], NULL); + if (optimal_dcfclk_for_uclk[i] < bw_params->clk_table.entries[0].dcfclk_mhz) { + optimal_dcfclk_for_uclk[i] = bw_params->clk_table.entries[0].dcfclk_mhz; + } + } + + // Calculate optimal uclk for each dcfclk sta target + for (i = 0; i < num_dcfclk_sta_targets; i++) { + for (j = 0; j < num_uclk_states; j++) { + if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) { + optimal_uclk_for_dcfclk_sta_targets[i] = + bw_params->clk_table.entries[j].memclk_mhz * 16; + break; + } + } + } + + i = 0; + j = 0; + // create the final dcfclk and uclk table + while (i < num_dcfclk_sta_targets && j < num_uclk_states && num_states < DC__VOLTAGE_STATES) { + if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j] && i < num_dcfclk_sta_targets) { + dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; + dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; + } else { + if (j < num_uclk_states && optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) { + dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j]; + dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16; + } else { + j = num_uclk_states; + } + } + } + + while (i < num_dcfclk_sta_targets && num_states < DC__VOLTAGE_STATES) { + dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; + dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; + } + + while (j < num_uclk_states && num_states < DC__VOLTAGE_STATES && + optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) { + dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j]; + dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16; + } + + dcn3_2_soc.num_states = num_states; + for (i = 0; i < dcn3_2_soc.num_states; i++) { + dcn3_2_soc.clock_limits[i].state = i; + dcn3_2_soc.clock_limits[i].dcfclk_mhz = dcfclk_mhz[i]; + dcn3_2_soc.clock_limits[i].fabricclk_mhz = dcfclk_mhz[i]; + + /* Fill all states with max values of all these clocks */ + dcn3_2_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz; + dcn3_2_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz; + dcn3_2_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz; + dcn3_2_soc.clock_limits[i].dscclk_mhz = max_dispclk_mhz / 3; + + /* Populate from bw_params for DTBCLK, SOCCLK */ + if (i > 0) { + if (!bw_params->clk_table.entries[i].dtbclk_mhz) { + dcn3_2_soc.clock_limits[i].dtbclk_mhz = dcn3_2_soc.clock_limits[i-1].dtbclk_mhz; + } else { + dcn3_2_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; + } + } else if (bw_params->clk_table.entries[i].dtbclk_mhz) { + dcn3_2_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; + } + + if (!bw_params->clk_table.entries[i].socclk_mhz && i > 0) + dcn3_2_soc.clock_limits[i].socclk_mhz = dcn3_2_soc.clock_limits[i-1].socclk_mhz; + else + dcn3_2_soc.clock_limits[i].socclk_mhz = bw_params->clk_table.entries[i].socclk_mhz; + + if (!dram_speed_mts[i] && i > 0) + dcn3_2_soc.clock_limits[i].dram_speed_mts = dcn3_2_soc.clock_limits[i-1].dram_speed_mts; + else + dcn3_2_soc.clock_limits[i].dram_speed_mts = dram_speed_mts[i]; + + /* These clocks cannot come from bw_params, always fill from dcn3_2_soc[0] */ + /* PHYCLK_D18, PHYCLK_D32 */ + dcn3_2_soc.clock_limits[i].phyclk_d18_mhz = dcn3_2_soc.clock_limits[0].phyclk_d18_mhz; + dcn3_2_soc.clock_limits[i].phyclk_d32_mhz = dcn3_2_soc.clock_limits[0].phyclk_d32_mhz; + } + } else { + build_synthetic_soc_states(bw_params, dcn3_2_soc.clock_limits, &dcn3_2_soc.num_states); + } + + /* Re-init DML with updated bb */ + dml_init_instance(&dc->dml, &dcn3_2_soc, &dcn3_2_ip, DML_PROJECT_DCN32); + if (dc->current_state) + dml_init_instance(&dc->current_state->bw_ctx.dml, &dcn3_2_soc, &dcn3_2_ip, DML_PROJECT_DCN32); + } +} + +void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes, + int pipe_cnt) +{ + dc_assert_fp_enabled(); + + pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0; + pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0; +} + +bool dcn32_allow_subvp_with_active_margin(struct pipe_ctx *pipe) +{ + bool allow = false; + uint32_t refresh_rate = 0; + + /* Allow subvp on displays that have active margin for 2560x1440@60hz displays + * only for now. There must be no scaling as well. + * + * For now we only enable on 2560x1440@60hz displays to enable 4K60 + 1440p60 configs + * for p-state switching. + */ + if (pipe->stream && pipe->plane_state) { + refresh_rate = (pipe->stream->timing.pix_clk_100hz * 100 + + pipe->stream->timing.v_total * pipe->stream->timing.h_total - 1) + / (double)(pipe->stream->timing.v_total * pipe->stream->timing.h_total); + if (pipe->stream->timing.v_addressable == 1440 && + pipe->stream->timing.h_addressable == 2560 && + refresh_rate >= 55 && refresh_rate <= 65 && + pipe->plane_state->src_rect.height == 1440 && + pipe->plane_state->src_rect.width == 2560 && + pipe->plane_state->dst_rect.height == 1440 && + pipe->plane_state->dst_rect.width == 2560) + allow = true; + } + return allow; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h new file mode 100644 index 000000000..ab010e7e8 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h @@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DCN32_FPU_H__ +#define __DCN32_FPU_H__ + +#include "clk_mgr_internal.h" + +void dcn32_build_wm_range_table_fpu(struct clk_mgr_internal *clk_mgr); + +void dcn32_helper_populate_phantom_dlg_params(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt); + +uint8_t dcn32_predict_pipe_split(struct dc_state *context, + display_e2e_pipe_params_st *pipe_e2e); + +void insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table, + unsigned int *num_entries, + struct _vcs_dpi_voltage_scaling_st *entry); + +void dcn32_set_phantom_stream_timing(struct dc *dc, + struct dc_state *context, + struct pipe_ctx *ref_pipe, + struct dc_stream_state *phantom_stream, + display_e2e_pipe_params_st *pipes, + unsigned int pipe_cnt, + unsigned int dc_pipe_idx); + +bool dcn32_internal_validate_bw(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int *pipe_cnt_out, + int *vlevel_out, + bool fast_validate); + +void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel); + +void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params); + +int dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel); + +void dcn32_patch_dpm_table(struct clk_bw_params *bw_params); + +void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes, + int pipe_cnt); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c new file mode 100644 index 000000000..19f556572 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -0,0 +1,3728 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dc.h" +#include "dc_link.h" +#include "../display_mode_lib.h" +#include "display_mode_vba_32.h" +#include "../dml_inline_defs.h" +#include "display_mode_vba_util_32.h" + +void dml32_recalculate(struct display_mode_lib *mode_lib); +static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation( + struct display_mode_lib *mode_lib); +void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib); + +void dml32_recalculate(struct display_mode_lib *mode_lib) +{ + ModeSupportAndSystemConfiguration(mode_lib); + + dml32_CalculateMaxDETAndMinCompressedBufferSize(mode_lib->vba.ConfigReturnBufferSizeInKByte, + mode_lib->vba.ROBBufferSizeInKByte, + DC__NUM_DPP, + false, //mode_lib->vba.override_setting.nomDETInKByteOverrideEnable, + 0, //mode_lib->vba.override_setting.nomDETInKByteOverrideValue, + + /* Output */ + &mode_lib->vba.MaxTotalDETInKByte, &mode_lib->vba.nomDETInKByte, + &mode_lib->vba.MinCompressedBufferSizeInKByte); + + PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: Calling DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation\n", __func__); +#endif + DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib); +} + +static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation( + struct display_mode_lib *mode_lib) +{ + struct vba_vars_st *v = &mode_lib->vba; + unsigned int j, k; + bool ImmediateFlipRequirementFinal; + int iteration; + double MaxTotalRDBandwidth; + unsigned int NextPrefetchMode; + double MaxTotalRDBandwidthNoUrgentBurst = 0.0; + bool DestinationLineTimesForPrefetchLessThan2 = false; + bool VRatioPrefetchMoreThanMax = false; + double TWait; + double TotalWRBandwidth = 0; + double WRBandwidth = 0; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: --- START ---\n", __func__); + dml_print("DML::%s: mode_lib->vba.PrefetchMode = %d\n", __func__, mode_lib->vba.PrefetchMode); + dml_print("DML::%s: mode_lib->vba.ImmediateFlipSupport = %d\n", __func__, mode_lib->vba.ImmediateFlipSupport); + dml_print("DML::%s: mode_lib->vba.VoltageLevel = %d\n", __func__, mode_lib->vba.VoltageLevel); +#endif + + v->WritebackDISPCLK = 0.0; + v->GlobalDPPCLK = 0.0; + + // DISPCLK and DPPCLK Calculation + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + if (mode_lib->vba.WritebackEnable[k]) { + v->WritebackDISPCLK = dml_max(v->WritebackDISPCLK, + dml32_CalculateWriteBackDISPCLK( + mode_lib->vba.WritebackPixelFormat[k], + mode_lib->vba.PixelClock[k], mode_lib->vba.WritebackHRatio[k], + mode_lib->vba.WritebackVRatio[k], + mode_lib->vba.WritebackHTaps[k], + mode_lib->vba.WritebackVTaps[k], + mode_lib->vba.WritebackSourceWidth[k], + mode_lib->vba.WritebackDestinationWidth[k], + mode_lib->vba.HTotal[k], mode_lib->vba.WritebackLineBufferSize, + mode_lib->vba.DISPCLKDPPCLKVCOSpeed)); + } + } + + v->DISPCLK_calculated = v->WritebackDISPCLK; + + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + if (mode_lib->vba.BlendingAndTiming[k] == k) { + v->DISPCLK_calculated = dml_max(v->DISPCLK_calculated, + dml32_CalculateRequiredDispclk( + mode_lib->vba.ODMCombineEnabled[k], + mode_lib->vba.PixelClock[k], + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading, + mode_lib->vba.DISPCLKRampingMargin, + mode_lib->vba.DISPCLKDPPCLKVCOSpeed, + mode_lib->vba.MaxDppclk[v->soc.num_states - 1])); + } + } + + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(mode_lib->vba.HRatio[k], + mode_lib->vba.HRatioChroma[k], + mode_lib->vba.VRatio[k], + mode_lib->vba.VRatioChroma[k], + mode_lib->vba.MaxDCHUBToPSCLThroughput, + mode_lib->vba.MaxPSCLToLBThroughput, + mode_lib->vba.PixelClock[k], + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.htaps[k], + mode_lib->vba.HTAPsChroma[k], + mode_lib->vba.vtaps[k], + mode_lib->vba.VTAPsChroma[k], + + /* Output */ + &v->PSCL_THROUGHPUT_LUMA[k], &v->PSCL_THROUGHPUT_CHROMA[k], + &v->DPPCLKUsingSingleDPP[k]); + } + + dml32_CalculateDPPCLK(mode_lib->vba.NumberOfActiveSurfaces, mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading, + mode_lib->vba.DISPCLKDPPCLKVCOSpeed, v->DPPCLKUsingSingleDPP, mode_lib->vba.DPPPerPlane, + /* Output */ + &v->GlobalDPPCLK, v->DPPCLK); + + for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { + v->DPPCLK_calculated[k] = v->DPPCLK[k]; + } + + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + dml32_CalculateBytePerPixelAndBlockSizes( + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.SurfaceTiling[k], + + /* Output */ + &v->BytePerPixelY[k], + &v->BytePerPixelC[k], + &v->BytePerPixelDETY[k], + &v->BytePerPixelDETC[k], + &v->BlockHeight256BytesY[k], + &v->BlockHeight256BytesC[k], + &v->BlockWidth256BytesY[k], + &v->BlockWidth256BytesC[k], + &v->BlockHeightY[k], + &v->BlockHeightC[k], + &v->BlockWidthY[k], + &v->BlockWidthC[k]); + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: %d\n", __func__, __LINE__); +#endif + dml32_CalculateSwathWidth( + false, // ForceSingleDPP + mode_lib->vba.NumberOfActiveSurfaces, + mode_lib->vba.SourcePixelFormat, + mode_lib->vba.SourceRotation, + mode_lib->vba.ViewportStationary, + mode_lib->vba.ViewportWidth, + mode_lib->vba.ViewportHeight, + mode_lib->vba.ViewportXStartY, + mode_lib->vba.ViewportYStartY, + mode_lib->vba.ViewportXStartC, + mode_lib->vba.ViewportYStartC, + mode_lib->vba.SurfaceWidthY, + mode_lib->vba.SurfaceWidthC, + mode_lib->vba.SurfaceHeightY, + mode_lib->vba.SurfaceHeightC, + mode_lib->vba.ODMCombineEnabled, + v->BytePerPixelY, + v->BytePerPixelC, + v->BlockHeight256BytesY, + v->BlockHeight256BytesC, + v->BlockWidth256BytesY, + v->BlockWidth256BytesC, + mode_lib->vba.BlendingAndTiming, + mode_lib->vba.HActive, + mode_lib->vba.HRatio, + mode_lib->vba.DPPPerPlane, + + /* Output */ + v->SwathWidthSingleDPPY, v->SwathWidthSingleDPPC, v->SwathWidthY, v->SwathWidthC, + v->dummy_vars + .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation + .dummy_integer_array[0], // Integer MaximumSwathHeightY[] + v->dummy_vars + .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation + .dummy_integer_array[1], // Integer MaximumSwathHeightC[] + v->swath_width_luma_ub, v->swath_width_chroma_ub); + + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + v->ReadBandwidthSurfaceLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] + / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k]; + v->ReadBandwidthSurfaceChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] + / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) + * mode_lib->vba.VRatioChroma[k]; +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: ReadBandwidthSurfaceLuma[%i] = %fBps\n", + __func__, k, v->ReadBandwidthSurfaceLuma[k]); + dml_print("DML::%s: ReadBandwidthSurfaceChroma[%i] = %fBps\n", + __func__, k, v->ReadBandwidthSurfaceChroma[k]); +#endif + } + + { + // VBA_DELTA + // Calculate DET size, swath height + dml32_CalculateSwathAndDETConfiguration( + mode_lib->vba.DETSizeOverride, + mode_lib->vba.UsesMALLForPStateChange, + mode_lib->vba.ConfigReturnBufferSizeInKByte, + mode_lib->vba.MaxTotalDETInKByte, + mode_lib->vba.MinCompressedBufferSizeInKByte, + false, /* ForceSingleDPP */ + mode_lib->vba.NumberOfActiveSurfaces, + mode_lib->vba.nomDETInKByte, + mode_lib->vba.UseUnboundedRequesting, + mode_lib->vba.DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment, + mode_lib->vba.ip.pixel_chunk_size_kbytes, + mode_lib->vba.ip.rob_buffer_size_kbytes, + mode_lib->vba.CompressedBufferSegmentSizeInkByteFinal, + v->dummy_vars + .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation + .dummy_output_encoder_array, /* output_encoder_class Output[] */ + v->ReadBandwidthSurfaceLuma, + v->ReadBandwidthSurfaceChroma, + v->dummy_vars + .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation + .dummy_single_array[0], /* Single MaximumSwathWidthLuma[] */ + v->dummy_vars + .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation + .dummy_single_array[1], /* Single MaximumSwathWidthChroma[] */ + mode_lib->vba.SourceRotation, + mode_lib->vba.ViewportStationary, + mode_lib->vba.SourcePixelFormat, + mode_lib->vba.SurfaceTiling, + mode_lib->vba.ViewportWidth, + mode_lib->vba.ViewportHeight, + mode_lib->vba.ViewportXStartY, + mode_lib->vba.ViewportYStartY, + mode_lib->vba.ViewportXStartC, + mode_lib->vba.ViewportYStartC, + mode_lib->vba.SurfaceWidthY, + mode_lib->vba.SurfaceWidthC, + mode_lib->vba.SurfaceHeightY, + mode_lib->vba.SurfaceHeightC, + v->BlockHeight256BytesY, + v->BlockHeight256BytesC, + v->BlockWidth256BytesY, + v->BlockWidth256BytesC, + mode_lib->vba.ODMCombineEnabled, + mode_lib->vba.BlendingAndTiming, + v->BytePerPixelY, + v->BytePerPixelC, + v->BytePerPixelDETY, + v->BytePerPixelDETC, + mode_lib->vba.HActive, + mode_lib->vba.HRatio, + mode_lib->vba.HRatioChroma, + mode_lib->vba.DPPPerPlane, + + /* Output */ + v->dummy_vars + .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation + .dummy_long_array[0], /* Long swath_width_luma_ub[] */ + v->dummy_vars + .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation + .dummy_long_array[1], /* Long swath_width_chroma_ub[] */ + v->dummy_vars + .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation + .dummy_double_array[0], /* Long SwathWidth[] */ + v->dummy_vars + .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation + .dummy_double_array[1], /* Long SwathWidthChroma[] */ + mode_lib->vba.SwathHeightY, + mode_lib->vba.SwathHeightC, + mode_lib->vba.DETBufferSizeInKByte, + mode_lib->vba.DETBufferSizeY, + mode_lib->vba.DETBufferSizeC, + &v->UnboundedRequestEnabled, + &v->CompressedBufferSizeInkByte, + &v->CompBufReservedSpaceKBytes, + &v->dummy_vars + .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation + .dummy_boolean, /* bool *CompBufReservedSpaceNeedAjustment */ + v->dummy_vars + .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation + .dummy_boolean_array, /* bool ViewportSizeSupportPerSurface[] */ + &v->dummy_vars + .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation + .dummy_boolean); /* bool *ViewportSizeSupport */ + } + + v->CompBufReservedSpaceZs = v->CompBufReservedSpaceKBytes * 1024.0 / 256.0; + v->CompBufReservedSpace64B = v->CompBufReservedSpaceKBytes * 1024.0 / 64.0; + + // DCFCLK Deep Sleep + dml32_CalculateDCFCLKDeepSleep( + mode_lib->vba.NumberOfActiveSurfaces, + v->BytePerPixelY, + v->BytePerPixelC, + mode_lib->vba.VRatio, + mode_lib->vba.VRatioChroma, + v->SwathWidthY, + v->SwathWidthC, + mode_lib->vba.DPPPerPlane, + mode_lib->vba.HRatio, + mode_lib->vba.HRatioChroma, + mode_lib->vba.PixelClock, + v->PSCL_THROUGHPUT_LUMA, + v->PSCL_THROUGHPUT_CHROMA, + mode_lib->vba.DPPCLK, + v->ReadBandwidthSurfaceLuma, + v->ReadBandwidthSurfaceChroma, + mode_lib->vba.ReturnBusWidth, + + /* Output */ + &v->DCFCLKDeepSleep); + + // DSCCLK + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + if ((mode_lib->vba.BlendingAndTiming[k] != k) || !mode_lib->vba.DSCEnabled[k]) { + v->DSCCLK_calculated[k] = 0.0; + } else { + if (mode_lib->vba.OutputFormat[k] == dm_420) + mode_lib->vba.DSCFormatFactor = 2; + else if (mode_lib->vba.OutputFormat[k] == dm_444) + mode_lib->vba.DSCFormatFactor = 1; + else if (mode_lib->vba.OutputFormat[k] == dm_n422) + mode_lib->vba.DSCFormatFactor = 2; + else + mode_lib->vba.DSCFormatFactor = 1; + if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) + v->DSCCLK_calculated[k] = mode_lib->vba.PixelClockBackEnd[k] / 12 + / mode_lib->vba.DSCFormatFactor + / (1 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100); + else if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) + v->DSCCLK_calculated[k] = mode_lib->vba.PixelClockBackEnd[k] / 6 + / mode_lib->vba.DSCFormatFactor + / (1 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100); + else + v->DSCCLK_calculated[k] = mode_lib->vba.PixelClockBackEnd[k] / 3 + / mode_lib->vba.DSCFormatFactor + / (1 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100); + } + } + + // DSC Delay + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + v->DSCDelay[k] = dml32_DSCDelayRequirement(mode_lib->vba.DSCEnabled[k], + mode_lib->vba.ODMCombineEnabled[k], mode_lib->vba.DSCInputBitPerComponent[k], + mode_lib->vba.OutputBppPerState[mode_lib->vba.VoltageLevel][k], + mode_lib->vba.HActive[k], mode_lib->vba.HTotal[k], + mode_lib->vba.NumberOfDSCSlices[k], mode_lib->vba.OutputFormat[k], + mode_lib->vba.Output[k], mode_lib->vba.PixelClock[k], + mode_lib->vba.PixelClockBackEnd[k], mode_lib->vba.ip.dsc_delay_factor_wa); + } + + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) + for (j = 0; j < mode_lib->vba.NumberOfActiveSurfaces; ++j) // NumberOfSurfaces + if (j != k && mode_lib->vba.BlendingAndTiming[k] == j && mode_lib->vba.DSCEnabled[j]) + v->DSCDelay[k] = v->DSCDelay[j]; + + //Immediate Flip + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + v->ImmediateFlipSupportedSurface[k] = mode_lib->vba.ImmediateFlipSupport + && (mode_lib->vba.ImmediateFlipRequirement[k] != dm_immediate_flip_not_required); + } + + // Prefetch + dml32_CalculateSurfaceSizeInMall( + mode_lib->vba.NumberOfActiveSurfaces, + mode_lib->vba.MALLAllocatedForDCNFinal, + mode_lib->vba.UseMALLForStaticScreen, + mode_lib->vba.DCCEnable, + mode_lib->vba.ViewportStationary, + mode_lib->vba.ViewportXStartY, + mode_lib->vba.ViewportYStartY, + mode_lib->vba.ViewportXStartC, + mode_lib->vba.ViewportYStartC, + mode_lib->vba.ViewportWidth, + mode_lib->vba.ViewportHeight, + v->BytePerPixelY, + mode_lib->vba.ViewportWidthChroma, + mode_lib->vba.ViewportHeightChroma, + v->BytePerPixelC, + mode_lib->vba.SurfaceWidthY, + mode_lib->vba.SurfaceWidthC, + mode_lib->vba.SurfaceHeightY, + mode_lib->vba.SurfaceHeightC, + v->BlockWidth256BytesY, + v->BlockWidth256BytesC, + v->BlockHeight256BytesY, + v->BlockHeight256BytesC, + v->BlockWidthY, + v->BlockWidthC, + v->BlockHeightY, + v->BlockHeightC, + + /* Output */ + v->SurfaceSizeInMALL, + &v->dummy_vars. + DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation + .dummy_boolean2); /* Boolean *ExceededMALLSize */ + + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].PixelClock = mode_lib->vba.PixelClock[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].DPPPerSurface = mode_lib->vba.DPPPerPlane[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].SourceRotation = mode_lib->vba.SourceRotation[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].ViewportHeight = mode_lib->vba.ViewportHeight[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].ViewportHeightChroma = mode_lib->vba.ViewportHeightChroma[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].BlockWidth256BytesY = v->BlockWidth256BytesY[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].BlockHeight256BytesY = v->BlockHeight256BytesY[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].BlockWidth256BytesC = v->BlockWidth256BytesC[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].BlockHeight256BytesC = v->BlockHeight256BytesC[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].BlockWidthY = v->BlockWidthY[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].BlockHeightY = v->BlockHeightY[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].BlockWidthC = v->BlockWidthC[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].BlockHeightC = v->BlockHeightC[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].InterlaceEnable = mode_lib->vba.Interlace[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].HTotal = mode_lib->vba.HTotal[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].DCCEnable = mode_lib->vba.DCCEnable[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].SourcePixelFormat = mode_lib->vba.SourcePixelFormat[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].SurfaceTiling = mode_lib->vba.SurfaceTiling[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].BytePerPixelY = v->BytePerPixelY[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].BytePerPixelC = v->BytePerPixelC[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].ProgressiveToInterlaceUnitInOPP = mode_lib->vba.ProgressiveToInterlaceUnitInOPP; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].VRatio = mode_lib->vba.VRatio[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].VRatioChroma = mode_lib->vba.VRatioChroma[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].VTaps = mode_lib->vba.vtaps[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].VTapsChroma = mode_lib->vba.VTAPsChroma[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].PitchY = mode_lib->vba.PitchY[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].DCCMetaPitchY = mode_lib->vba.DCCMetaPitchY[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].PitchC = mode_lib->vba.PitchC[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].DCCMetaPitchC = mode_lib->vba.DCCMetaPitchC[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].ViewportStationary = mode_lib->vba.ViewportStationary[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].ViewportXStart = mode_lib->vba.ViewportXStartY[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].ViewportYStart = mode_lib->vba.ViewportYStartY[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].ViewportXStartC = mode_lib->vba.ViewportXStartC[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].ViewportYStartC = mode_lib->vba.ViewportYStartC[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].FORCE_ONE_ROW_FOR_FRAME = mode_lib->vba.ForceOneRowForFrame[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].SwathHeightY = mode_lib->vba.SwathHeightY[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].SwathHeightC = mode_lib->vba.SwathHeightC[k]; + } + + { + + dml32_CalculateVMRowAndSwath( + mode_lib->vba.NumberOfActiveSurfaces, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters, + v->SurfaceSizeInMALL, + mode_lib->vba.PTEBufferSizeInRequestsLuma, + mode_lib->vba.PTEBufferSizeInRequestsChroma, + mode_lib->vba.DCCMetaBufferSizeBytes, + mode_lib->vba.UseMALLForStaticScreen, + mode_lib->vba.UsesMALLForPStateChange, + mode_lib->vba.MALLAllocatedForDCNFinal, + v->SwathWidthY, + v->SwathWidthC, + mode_lib->vba.GPUVMEnable, + mode_lib->vba.HostVMEnable, + mode_lib->vba.HostVMMaxNonCachedPageTableLevels, + mode_lib->vba.GPUVMMaxPageTableLevels, + mode_lib->vba.GPUVMMinPageSizeKBytes, + mode_lib->vba.HostVMMinPageSize, + + /* Output */ + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_boolean_array2[0], // Boolean PTEBufferSizeNotExceeded[] + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_boolean_array2[1], // Boolean DCCMetaBufferSizeNotExceeded[] + v->dpte_row_width_luma_ub, + v->dpte_row_width_chroma_ub, + v->dpte_row_height, + v->dpte_row_height_chroma, + v->dpte_row_height_linear, + v->dpte_row_height_linear_chroma, + v->meta_req_width, + v->meta_req_width_chroma, + v->meta_req_height, + v->meta_req_height_chroma, + v->meta_row_width, + v->meta_row_width_chroma, + v->meta_row_height, + v->meta_row_height_chroma, + v->vm_group_bytes, + v->dpte_group_bytes, + v->PixelPTEReqWidthY, + v->PixelPTEReqHeightY, + v->PTERequestSizeY, + v->PixelPTEReqWidthC, + v->PixelPTEReqHeightC, + v->PTERequestSizeC, + v->dpde0_bytes_per_frame_ub_l, + v->meta_pte_bytes_per_frame_ub_l, + v->dpde0_bytes_per_frame_ub_c, + v->meta_pte_bytes_per_frame_ub_c, + v->PrefetchSourceLinesY, + v->PrefetchSourceLinesC, + v->VInitPreFillY, v->VInitPreFillC, + v->MaxNumSwathY, + v->MaxNumSwathC, + v->meta_row_bw, + v->dpte_row_bw, + v->PixelPTEBytesPerRow, + v->PDEAndMetaPTEBytesFrame, + v->MetaRowByte, + v->Use_One_Row_For_Frame, + v->Use_One_Row_For_Frame_Flip, + v->UsesMALLForStaticScreen, + v->PTE_BUFFER_MODE, + v->BIGK_FRAGMENT_SIZE); + } + + + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.ReorderBytes = mode_lib->vba.NumberOfChannels + * dml_max3(mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly, + mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData, + mode_lib->vba.UrgentOutOfOrderReturnPerChannelVMDataOnly); + + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.VMDataOnlyReturnBW = dml32_get_return_bw_mbps_vm_only( + &mode_lib->vba.soc, + mode_lib->vba.VoltageLevel, + mode_lib->vba.DCFCLK, + mode_lib->vba.FabricClock, + mode_lib->vba.DRAMSpeed); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: mode_lib->vba.ReturnBusWidth = %f\n", __func__, mode_lib->vba.ReturnBusWidth); + dml_print("DML::%s: mode_lib->vba.DCFCLK = %f\n", __func__, mode_lib->vba.DCFCLK); + dml_print("DML::%s: mode_lib->vba.FabricClock = %f\n", __func__, mode_lib->vba.FabricClock); + dml_print("DML::%s: mode_lib->vba.FabricDatapathToDCNDataReturn = %f\n", __func__, + mode_lib->vba.FabricDatapathToDCNDataReturn); + dml_print("DML::%s: mode_lib->vba.PercentOfIdealSDPPortBWReceivedAfterUrgLatency = %f\n", + __func__, mode_lib->vba.PercentOfIdealSDPPortBWReceivedAfterUrgLatency); + dml_print("DML::%s: mode_lib->vba.DRAMSpeed = %f\n", __func__, mode_lib->vba.DRAMSpeed); + dml_print("DML::%s: mode_lib->vba.NumberOfChannels = %f\n", __func__, mode_lib->vba.NumberOfChannels); + dml_print("DML::%s: mode_lib->vba.DRAMChannelWidth = %f\n", __func__, mode_lib->vba.DRAMChannelWidth); + dml_print("DML::%s: mode_lib->vba.PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly = %f\n", + __func__, mode_lib->vba.PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly); + dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW); + dml_print("DML::%s: ReturnBW = %f\n", __func__, mode_lib->vba.ReturnBW); +#endif + + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.HostVMInefficiencyFactor = 1.0; + + if (mode_lib->vba.GPUVMEnable && mode_lib->vba.HostVMEnable) + v->dummy_vars + .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation + .HostVMInefficiencyFactor = + mode_lib->vba.ReturnBW / v->dummy_vars + .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation + .VMDataOnlyReturnBW; + + mode_lib->vba.TotalDCCActiveDPP = 0; + mode_lib->vba.TotalActiveDPP = 0; + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + mode_lib->vba.TotalActiveDPP = mode_lib->vba.TotalActiveDPP + mode_lib->vba.DPPPerPlane[k]; + if (mode_lib->vba.DCCEnable[k]) + mode_lib->vba.TotalDCCActiveDPP = mode_lib->vba.TotalDCCActiveDPP + + mode_lib->vba.DPPPerPlane[k]; + } + + v->UrgentExtraLatency = dml32_CalculateExtraLatency( + mode_lib->vba.RoundTripPingLatencyCycles, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.ReorderBytes, + mode_lib->vba.DCFCLK, + mode_lib->vba.TotalActiveDPP, + mode_lib->vba.PixelChunkSizeInKByte, + mode_lib->vba.TotalDCCActiveDPP, + mode_lib->vba.MetaChunkSize, + mode_lib->vba.ReturnBW, + mode_lib->vba.GPUVMEnable, + mode_lib->vba.HostVMEnable, + mode_lib->vba.NumberOfActiveSurfaces, + mode_lib->vba.DPPPerPlane, + v->dpte_group_bytes, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.HostVMInefficiencyFactor, + mode_lib->vba.HostVMMinPageSize, + mode_lib->vba.HostVMMaxNonCachedPageTableLevels); + + mode_lib->vba.TCalc = 24.0 / v->DCFCLKDeepSleep; + + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + if (mode_lib->vba.BlendingAndTiming[k] == k) { + if (mode_lib->vba.WritebackEnable[k] == true) { + v->WritebackDelay[mode_lib->vba.VoltageLevel][k] = mode_lib->vba.WritebackLatency + + dml32_CalculateWriteBackDelay( + mode_lib->vba.WritebackPixelFormat[k], + mode_lib->vba.WritebackHRatio[k], + mode_lib->vba.WritebackVRatio[k], + mode_lib->vba.WritebackVTaps[k], + mode_lib->vba.WritebackDestinationWidth[k], + mode_lib->vba.WritebackDestinationHeight[k], + mode_lib->vba.WritebackSourceHeight[k], + mode_lib->vba.HTotal[k]) / mode_lib->vba.DISPCLK; + } else + v->WritebackDelay[mode_lib->vba.VoltageLevel][k] = 0; + for (j = 0; j < mode_lib->vba.NumberOfActiveSurfaces; ++j) { + if (mode_lib->vba.BlendingAndTiming[j] == k && + mode_lib->vba.WritebackEnable[j] == true) { + v->WritebackDelay[mode_lib->vba.VoltageLevel][k] = + dml_max(v->WritebackDelay[mode_lib->vba.VoltageLevel][k], + mode_lib->vba.WritebackLatency + + dml32_CalculateWriteBackDelay( + mode_lib->vba.WritebackPixelFormat[j], + mode_lib->vba.WritebackHRatio[j], + mode_lib->vba.WritebackVRatio[j], + mode_lib->vba.WritebackVTaps[j], + mode_lib->vba.WritebackDestinationWidth[j], + mode_lib->vba.WritebackDestinationHeight[j], + mode_lib->vba.WritebackSourceHeight[j], + mode_lib->vba.HTotal[k]) / mode_lib->vba.DISPCLK); + } + } + } + } + + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) + for (j = 0; j < mode_lib->vba.NumberOfActiveSurfaces; ++j) + if (mode_lib->vba.BlendingAndTiming[k] == j) + v->WritebackDelay[mode_lib->vba.VoltageLevel][k] = + v->WritebackDelay[mode_lib->vba.VoltageLevel][j]; + + v->UrgentLatency = dml32_CalculateUrgentLatency(mode_lib->vba.UrgentLatencyPixelDataOnly, + mode_lib->vba.UrgentLatencyPixelMixedWithVMData, + mode_lib->vba.UrgentLatencyVMDataOnly, + mode_lib->vba.DoUrgentLatencyAdjustment, + mode_lib->vba.UrgentLatencyAdjustmentFabricClockComponent, + mode_lib->vba.UrgentLatencyAdjustmentFabricClockReference, + mode_lib->vba.FabricClock); + + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + dml32_CalculateUrgentBurstFactor(mode_lib->vba.UsesMALLForPStateChange[k], + v->swath_width_luma_ub[k], + v->swath_width_chroma_ub[k], + mode_lib->vba.SwathHeightY[k], + mode_lib->vba.SwathHeightC[k], + mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k], + v->UrgentLatency, + mode_lib->vba.CursorBufferSize, + mode_lib->vba.CursorWidth[k][0], + mode_lib->vba.CursorBPP[k][0], + mode_lib->vba.VRatio[k], + mode_lib->vba.VRatioChroma[k], + v->BytePerPixelDETY[k], + v->BytePerPixelDETC[k], + mode_lib->vba.DETBufferSizeY[k], + mode_lib->vba.DETBufferSizeC[k], + + /* output */ + &v->UrgBurstFactorCursor[k], + &v->UrgBurstFactorLuma[k], + &v->UrgBurstFactorChroma[k], + &v->NoUrgentLatencyHiding[k]); + + v->cursor_bw[k] = mode_lib->vba.NumberOfCursors[k] * mode_lib->vba.CursorWidth[k][0] * mode_lib->vba.CursorBPP[k][0] / 8 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k]; + } + + v->NotEnoughDETSwathFillLatencyHiding = dml32_CalculateDETSwathFillLatencyHiding( + mode_lib->vba.NumberOfActiveSurfaces, + mode_lib->vba.ReturnBW, + v->UrgentLatency, + mode_lib->vba.SwathHeightY, + mode_lib->vba.SwathHeightC, + v->swath_width_luma_ub, + v->swath_width_chroma_ub, + v->BytePerPixelDETY, + v->BytePerPixelDETC, + mode_lib->vba.DETBufferSizeY, + mode_lib->vba.DETBufferSizeC, + mode_lib->vba.DPPPerPlane, + mode_lib->vba.HTotal, + mode_lib->vba.PixelClock, + mode_lib->vba.VRatio, + mode_lib->vba.VRatioChroma, + mode_lib->vba.UsesMALLForPStateChange); + + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + v->MaxVStartupLines[k] = ((mode_lib->vba.Interlace[k] && + !mode_lib->vba.ProgressiveToInterlaceUnitInOPP) ? + dml_floor((mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]) / 2.0, 1.0) : + mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]) - dml_max(1.0, + dml_ceil((double) v->WritebackDelay[mode_lib->vba.VoltageLevel][k] + / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]), 1)); + + // Clamp to max OTG vstartup register limit + if (v->MaxVStartupLines[k] > 1023) + v->MaxVStartupLines[k] = 1023; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%d MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]); + dml_print("DML::%s: k=%d VoltageLevel = %d\n", __func__, k, mode_lib->vba.VoltageLevel); + dml_print("DML::%s: k=%d WritebackDelay = %f\n", __func__, + k, v->WritebackDelay[mode_lib->vba.VoltageLevel][k]); +#endif + } + + v->MaximumMaxVStartupLines = 0; + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) + v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]); + + ImmediateFlipRequirementFinal = false; + + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + ImmediateFlipRequirementFinal = ImmediateFlipRequirementFinal + || (mode_lib->vba.ImmediateFlipRequirement[k] == dm_immediate_flip_required); + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: ImmediateFlipRequirementFinal = %d\n", __func__, ImmediateFlipRequirementFinal); +#endif + // ModeProgramming will not repeat the schedule calculation using different prefetch mode, + //it is just calcualated once with given prefetch mode + dml32_CalculateMinAndMaxPrefetchMode( + mode_lib->vba.AllowForPStateChangeOrStutterInVBlankFinal, + &mode_lib->vba.MinPrefetchMode, + &mode_lib->vba.MaxPrefetchMode); + + v->VStartupLines = __DML_VBA_MIN_VSTARTUP__; + + iteration = 0; + MaxTotalRDBandwidth = 0; + NextPrefetchMode = mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb]; + + do { + MaxTotalRDBandwidth = 0; + DestinationLineTimesForPrefetchLessThan2 = false; + VRatioPrefetchMoreThanMax = false; +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, mode_lib->vba.VStartupLines); +#endif + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + /* NOTE PerfetchMode variable is invalid in DAL as per the input received. + * Hence the direction is to use PrefetchModePerState. + */ + TWait = dml32_CalculateTWait( + mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb], + mode_lib->vba.UsesMALLForPStateChange[k], + mode_lib->vba.SynchronizeDRRDisplaysForUCLKPStateChangeFinal, + mode_lib->vba.DRRDisplay[k], + mode_lib->vba.DRAMClockChangeLatency, + mode_lib->vba.FCLKChangeLatency, v->UrgentLatency, + mode_lib->vba.SREnterPlusExitTime); + + memset(&v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe, 0, sizeof(DmlPipe)); + + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.Dppclk = mode_lib->vba.DPPCLK[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.Dispclk = mode_lib->vba.DISPCLK; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.PixelClock = mode_lib->vba.PixelClock[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.DCFClkDeepSleep = v->DCFCLKDeepSleep; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.DPPPerSurface = mode_lib->vba.DPPPerPlane[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.SourceRotation = mode_lib->vba.SourceRotation[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.InterlaceEnable = mode_lib->vba.Interlace[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.HTotal = mode_lib->vba.HTotal[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.HActive = mode_lib->vba.HActive[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.DCCEnable = mode_lib->vba.DCCEnable[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.ODMMode = mode_lib->vba.ODMCombineEnabled[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.SourcePixelFormat = mode_lib->vba.SourcePixelFormat[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BytePerPixelY = v->BytePerPixelY[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BytePerPixelC = v->BytePerPixelC[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.ProgressiveToInterlaceUnitInOPP = mode_lib->vba.ProgressiveToInterlaceUnitInOPP; + v->ErrorResult[k] = dml32_CalculatePrefetchSchedule( + v, + k, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.HostVMInefficiencyFactor, + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe, + v->DSCDelay[k], + (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]), + dml_min(v->VStartupLines, v->MaxVStartupLines[k]), + v->MaxVStartupLines[k], + v->UrgentLatency, + v->UrgentExtraLatency, + v->TCalc, + v->PDEAndMetaPTEBytesFrame[k], + v->MetaRowByte[k], + v->PixelPTEBytesPerRow[k], + v->PrefetchSourceLinesY[k], + v->SwathWidthY[k], + v->VInitPreFillY[k], + v->MaxNumSwathY[k], + v->PrefetchSourceLinesC[k], + v->SwathWidthC[k], + v->VInitPreFillC[k], + v->MaxNumSwathC[k], + v->swath_width_luma_ub[k], + v->swath_width_chroma_ub[k], + v->SwathHeightY[k], + v->SwathHeightC[k], + TWait, + (v->DRAMSpeedPerState[mode_lib->vba.VoltageLevel] <= MEM_STROBE_FREQ_MHZ || + v->DCFCLKPerState[mode_lib->vba.VoltageLevel] <= DCFCLK_FREQ_EXTRA_PREFETCH_REQ_MHZ) ? + mode_lib->vba.ip.min_prefetch_in_strobe_us : 0, + /* Output */ + &v->DSTXAfterScaler[k], + &v->DSTYAfterScaler[k], + &v->DestinationLinesForPrefetch[k], + &v->PrefetchBandwidth[k], + &v->DestinationLinesToRequestVMInVBlank[k], + &v->DestinationLinesToRequestRowInVBlank[k], + &v->VRatioPrefetchY[k], + &v->VRatioPrefetchC[k], + &v->RequiredPrefetchPixDataBWLuma[k], + &v->RequiredPrefetchPixDataBWChroma[k], + &v->NotEnoughTimeForDynamicMetadata[k], + &v->Tno_bw[k], &v->prefetch_vmrow_bw[k], + &v->Tdmdl_vm[k], + &v->Tdmdl[k], + &v->TSetup[k], + &v->VUpdateOffsetPix[k], + &v->VUpdateWidthPix[k], + &v->VReadyOffsetPix[k]); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%0d Prefetch calculation errResult=%0d\n", + __func__, k, mode_lib->vba.ErrorResult[k]); +#endif + v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]); + } + + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + dml32_CalculateUrgentBurstFactor(mode_lib->vba.UsesMALLForPStateChange[k], + v->swath_width_luma_ub[k], + v->swath_width_chroma_ub[k], + mode_lib->vba.SwathHeightY[k], + mode_lib->vba.SwathHeightC[k], + mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k], + v->UrgentLatency, + mode_lib->vba.CursorBufferSize, + mode_lib->vba.CursorWidth[k][0], + mode_lib->vba.CursorBPP[k][0], + v->VRatioPrefetchY[k], + v->VRatioPrefetchC[k], + v->BytePerPixelDETY[k], + v->BytePerPixelDETC[k], + mode_lib->vba.DETBufferSizeY[k], + mode_lib->vba.DETBufferSizeC[k], + /* Output */ + &v->UrgBurstFactorCursorPre[k], + &v->UrgBurstFactorLumaPre[k], + &v->UrgBurstFactorChromaPre[k], + &v->NoUrgentLatencyHidingPre[k]); + + v->cursor_bw_pre[k] = mode_lib->vba.NumberOfCursors[k] * mode_lib->vba.CursorWidth[k][0] * mode_lib->vba.CursorBPP[k][0] / + 8.0 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * v->VRatioPrefetchY[k]; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%0d DPPPerSurface=%d\n", __func__, k, mode_lib->vba.DPPPerPlane[k]); + dml_print("DML::%s: k=%0d UrgBurstFactorLuma=%f\n", __func__, k, v->UrgBurstFactorLuma[k]); + dml_print("DML::%s: k=%0d UrgBurstFactorChroma=%f\n", __func__, k, v->UrgBurstFactorChroma[k]); + dml_print("DML::%s: k=%0d UrgBurstFactorLumaPre=%f\n", __func__, k, + v->UrgBurstFactorLumaPre[k]); + dml_print("DML::%s: k=%0d UrgBurstFactorChromaPre=%f\n", __func__, k, + v->UrgBurstFactorChromaPre[k]); + + dml_print("DML::%s: k=%0d VRatioPrefetchY=%f\n", __func__, k, v->VRatioPrefetchY[k]); + dml_print("DML::%s: k=%0d VRatioY=%f\n", __func__, k, mode_lib->vba.VRatio[k]); + + dml_print("DML::%s: k=%0d prefetch_vmrow_bw=%f\n", __func__, k, v->prefetch_vmrow_bw[k]); + dml_print("DML::%s: k=%0d ReadBandwidthSurfaceLuma=%f\n", __func__, k, + v->ReadBandwidthSurfaceLuma[k]); + dml_print("DML::%s: k=%0d ReadBandwidthSurfaceChroma=%f\n", __func__, k, + v->ReadBandwidthSurfaceChroma[k]); + dml_print("DML::%s: k=%0d cursor_bw=%f\n", __func__, k, v->cursor_bw[k]); + dml_print("DML::%s: k=%0d meta_row_bw=%f\n", __func__, k, v->meta_row_bw[k]); + dml_print("DML::%s: k=%0d dpte_row_bw=%f\n", __func__, k, v->dpte_row_bw[k]); + dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, + v->RequiredPrefetchPixDataBWLuma[k]); + dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, + v->RequiredPrefetchPixDataBWChroma[k]); + dml_print("DML::%s: k=%0d cursor_bw_pre=%f\n", __func__, k, v->cursor_bw_pre[k]); + dml_print("DML::%s: k=%0d MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, + MaxTotalRDBandwidthNoUrgentBurst); +#endif + if (v->DestinationLinesForPrefetch[k] < 2) + DestinationLineTimesForPrefetchLessThan2 = true; + + if (v->VRatioPrefetchY[k] > __DML_MAX_VRATIO_PRE__ + || v->VRatioPrefetchC[k] > __DML_MAX_VRATIO_PRE__) + VRatioPrefetchMoreThanMax = true; + + //bool DestinationLinesToRequestVMInVBlankEqualOrMoreThan32 = false; + //bool DestinationLinesToRequestRowInVBlankEqualOrMoreThan16 = false; + //if (v->DestinationLinesToRequestVMInVBlank[k] >= 32) { + // DestinationLinesToRequestVMInVBlankEqualOrMoreThan32 = true; + //} + + //if (v->DestinationLinesToRequestRowInVBlank[k] >= 16) { + // DestinationLinesToRequestRowInVBlankEqualOrMoreThan16 = true; + //} + } + + v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / mode_lib->vba.ReturnBW; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f\n", + __func__, MaxTotalRDBandwidthNoUrgentBurst); + dml_print("DML::%s: ReturnBW=%f\n", __func__, mode_lib->vba.ReturnBW); + dml_print("DML::%s: FractionOfUrgentBandwidth=%f\n", + __func__, mode_lib->vba.FractionOfUrgentBandwidth); +#endif + + { + dml32_CalculatePrefetchBandwithSupport( + mode_lib->vba.NumberOfActiveSurfaces, + mode_lib->vba.ReturnBW, + v->NoUrgentLatencyHidingPre, + v->ReadBandwidthSurfaceLuma, + v->ReadBandwidthSurfaceChroma, + v->RequiredPrefetchPixDataBWLuma, + v->RequiredPrefetchPixDataBWChroma, + v->cursor_bw, + v->meta_row_bw, + v->dpte_row_bw, + v->cursor_bw_pre, + v->prefetch_vmrow_bw, + mode_lib->vba.DPPPerPlane, + v->UrgBurstFactorLuma, + v->UrgBurstFactorChroma, + v->UrgBurstFactorCursor, + v->UrgBurstFactorLumaPre, + v->UrgBurstFactorChromaPre, + v->UrgBurstFactorCursorPre, + + /* output */ + &MaxTotalRDBandwidth, + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[0], + &v->PrefetchModeSupported); + } + + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector[k] = 1.0; + + { + dml32_CalculatePrefetchBandwithSupport(mode_lib->vba.NumberOfActiveSurfaces, + mode_lib->vba.ReturnBW, + v->NoUrgentLatencyHidingPre, + v->ReadBandwidthSurfaceLuma, + v->ReadBandwidthSurfaceChroma, + v->RequiredPrefetchPixDataBWLuma, + v->RequiredPrefetchPixDataBWChroma, + v->cursor_bw, + v->meta_row_bw, + v->dpte_row_bw, + v->cursor_bw_pre, + v->prefetch_vmrow_bw, + mode_lib->vba.DPPPerPlane, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + + /* output */ + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[0], + &v->FractionOfUrgentBandwidth, + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_boolean); + } + + if (VRatioPrefetchMoreThanMax != false || DestinationLineTimesForPrefetchLessThan2 != false) { + v->PrefetchModeSupported = false; + } + + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + if (v->ErrorResult[k] == true || v->NotEnoughTimeForDynamicMetadata[k]) { + v->PrefetchModeSupported = false; + } + } + + if (v->PrefetchModeSupported == true && mode_lib->vba.ImmediateFlipSupport == true) { + mode_lib->vba.BandwidthAvailableForImmediateFlip = dml32_CalculateBandwidthAvailableForImmediateFlip( + mode_lib->vba.NumberOfActiveSurfaces, + mode_lib->vba.ReturnBW, + v->ReadBandwidthSurfaceLuma, + v->ReadBandwidthSurfaceChroma, + v->RequiredPrefetchPixDataBWLuma, + v->RequiredPrefetchPixDataBWChroma, + v->cursor_bw, + v->cursor_bw_pre, + mode_lib->vba.DPPPerPlane, + v->UrgBurstFactorLuma, + v->UrgBurstFactorChroma, + v->UrgBurstFactorCursor, + v->UrgBurstFactorLumaPre, + v->UrgBurstFactorChromaPre, + v->UrgBurstFactorCursorPre); + + mode_lib->vba.TotImmediateFlipBytes = 0; + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + if (mode_lib->vba.ImmediateFlipRequirement[k] != dm_immediate_flip_not_required) { + mode_lib->vba.TotImmediateFlipBytes = mode_lib->vba.TotImmediateFlipBytes + + mode_lib->vba.DPPPerPlane[k] + * (v->PDEAndMetaPTEBytesFrame[k] + + v->MetaRowByte[k]); + if (v->use_one_row_for_frame_flip[k][0][0]) { + mode_lib->vba.TotImmediateFlipBytes = + mode_lib->vba.TotImmediateFlipBytes + + 2 * v->PixelPTEBytesPerRow[k]; + } else { + mode_lib->vba.TotImmediateFlipBytes = + mode_lib->vba.TotImmediateFlipBytes + + v->PixelPTEBytesPerRow[k]; + } + } + } + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + dml32_CalculateFlipSchedule(v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.HostVMInefficiencyFactor, + v->UrgentExtraLatency, + v->UrgentLatency, + mode_lib->vba.GPUVMMaxPageTableLevels, + mode_lib->vba.HostVMEnable, + mode_lib->vba.HostVMMaxNonCachedPageTableLevels, + mode_lib->vba.GPUVMEnable, + mode_lib->vba.HostVMMinPageSize, + v->PDEAndMetaPTEBytesFrame[k], + v->MetaRowByte[k], + v->PixelPTEBytesPerRow[k], + mode_lib->vba.BandwidthAvailableForImmediateFlip, + mode_lib->vba.TotImmediateFlipBytes, + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k], + mode_lib->vba.VRatio[k], + mode_lib->vba.VRatioChroma[k], + v->Tno_bw[k], + mode_lib->vba.DCCEnable[k], + v->dpte_row_height[k], + v->meta_row_height[k], + v->dpte_row_height_chroma[k], + v->meta_row_height_chroma[k], + v->Use_One_Row_For_Frame_Flip[k], + + /* Output */ + &v->DestinationLinesToRequestVMInImmediateFlip[k], + &v->DestinationLinesToRequestRowInImmediateFlip[k], + &v->final_flip_bw[k], + &v->ImmediateFlipSupportedForPipe[k]); + } + + { + dml32_CalculateImmediateFlipBandwithSupport(mode_lib->vba.NumberOfActiveSurfaces, + mode_lib->vba.ReturnBW, + mode_lib->vba.ImmediateFlipRequirement, + v->final_flip_bw, + v->ReadBandwidthSurfaceLuma, + v->ReadBandwidthSurfaceChroma, + v->RequiredPrefetchPixDataBWLuma, + v->RequiredPrefetchPixDataBWChroma, + v->cursor_bw, + v->meta_row_bw, + v->dpte_row_bw, + v->cursor_bw_pre, + v->prefetch_vmrow_bw, + mode_lib->vba.DPPPerPlane, + v->UrgBurstFactorLuma, + v->UrgBurstFactorChroma, + v->UrgBurstFactorCursor, + v->UrgBurstFactorLumaPre, + v->UrgBurstFactorChromaPre, + v->UrgBurstFactorCursorPre, + + /* output */ + &v->total_dcn_read_bw_with_flip, // Single *TotalBandwidth + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[0], // Single *FractionOfUrgentBandwidth + &v->ImmediateFlipSupported); // Boolean *ImmediateFlipBandwidthSupport + + dml32_CalculateImmediateFlipBandwithSupport(mode_lib->vba.NumberOfActiveSurfaces, + mode_lib->vba.ReturnBW, + mode_lib->vba.ImmediateFlipRequirement, + v->final_flip_bw, + v->ReadBandwidthSurfaceLuma, + v->ReadBandwidthSurfaceChroma, + v->RequiredPrefetchPixDataBWLuma, + v->RequiredPrefetchPixDataBWChroma, + v->cursor_bw, + v->meta_row_bw, + v->dpte_row_bw, + v->cursor_bw_pre, + v->prefetch_vmrow_bw, + mode_lib->vba.DPPPerPlane, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + + /* output */ + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[1], // Single *TotalBandwidth + &v->FractionOfUrgentBandwidthImmediateFlip, // Single *FractionOfUrgentBandwidth + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_boolean); // Boolean *ImmediateFlipBandwidthSupport + } + + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + if (mode_lib->vba.ImmediateFlipRequirement[k] != dm_immediate_flip_not_required && v->ImmediateFlipSupportedForPipe[k] == false) { + v->ImmediateFlipSupported = false; +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: Pipe %0d not supporting iflip\n", __func__, k); +#endif + } + } + } else { + v->ImmediateFlipSupported = false; + } + + /* consider flip support is okay if the flip bw is ok or (when user does't require a iflip and there is no host vm) */ + v->PrefetchAndImmediateFlipSupported = (v->PrefetchModeSupported == true && + ((!mode_lib->vba.ImmediateFlipSupport && !mode_lib->vba.HostVMEnable && !ImmediateFlipRequirementFinal) || + v->ImmediateFlipSupported)) ? true : false; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: PrefetchModeSupported = %d\n", __func__, locals->PrefetchModeSupported); + for (uint k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) + dml_print("DML::%s: ImmediateFlipRequirement[%d] = %d\n", __func__, k, mode_lib->vba.ImmediateFlipRequirement[k] == dm_immediate_flip_required); + dml_print("DML::%s: ImmediateFlipSupported = %d\n", __func__, locals->ImmediateFlipSupported); + dml_print("DML::%s: ImmediateFlipSupport = %d\n", __func__, mode_lib->vba.ImmediateFlipSupport); + dml_print("DML::%s: HostVMEnable = %d\n", __func__, mode_lib->vba.HostVMEnable); + dml_print("DML::%s: PrefetchAndImmediateFlipSupported = %d\n", __func__, locals->PrefetchAndImmediateFlipSupported); + dml_print("DML::%s: Done loop: Vstartup=%d, Max Vstartup=%d\n", __func__, locals->VStartupLines, locals->MaximumMaxVStartupLines); +#endif + + v->VStartupLines = v->VStartupLines + 1; + + if (v->VStartupLines > v->MaximumMaxVStartupLines) { +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: Vstartup exceeds max vstartup, exiting loop\n", __func__); +#endif + break; // VBA_DELTA: Implementation divergence! Gabe is *still* iterating across prefetch modes which we don't care to do + } + iteration++; + if (iteration > 2500) { +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: too many errors, exit now\n", __func__); + assert(0); +#endif + } + } while (!(v->PrefetchAndImmediateFlipSupported || NextPrefetchMode > mode_lib->vba.MaxPrefetchMode)); + + + if (v->VStartupLines <= v->MaximumMaxVStartupLines) { +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: Good, Prefetch and flip scheduling found solution at VStartupLines=%d\n", __func__, locals->VStartupLines-1); +#endif + } + + + //Watermarks and NB P-State/DRAM Clock Change Support + { + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.UrgentLatency = v->UrgentLatency; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.ExtraLatency = v->UrgentExtraLatency; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.WritebackLatency = mode_lib->vba.WritebackLatency; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.DRAMClockChangeLatency = mode_lib->vba.DRAMClockChangeLatency; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.FCLKChangeLatency = mode_lib->vba.FCLKChangeLatency; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SRExitTime = mode_lib->vba.SRExitTime; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SREnterPlusExitTime = mode_lib->vba.SREnterPlusExitTime; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SRExitZ8Time = mode_lib->vba.SRExitZ8Time; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SREnterPlusExitZ8Time = mode_lib->vba.SREnterPlusExitZ8Time; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.USRRetrainingLatency = mode_lib->vba.USRRetrainingLatency; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SMNLatency = mode_lib->vba.SMNLatency; + + dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( + v, + v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb], + v->DCFCLK, + v->ReturnBW, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters, + v->SOCCLK, + v->DCFCLKDeepSleep, + v->DETBufferSizeY, + v->DETBufferSizeC, + v->SwathHeightY, + v->SwathHeightC, + v->SwathWidthY, + v->SwathWidthC, + v->DPPPerPlane, + v->BytePerPixelDETY, + v->BytePerPixelDETC, + v->DSTXAfterScaler, + v->DSTYAfterScaler, + v->UnboundedRequestEnabled, + v->CompressedBufferSizeInkByte, + + /* Output */ + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_dramchange_support, + v->MaxActiveDRAMClockChangeLatencySupported, + v->SubViewportLinesNeededInMALL, + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_fclkchange_support, + &v->MinActiveFCLKChangeLatencySupported, + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_USRRetrainingSupport, + mode_lib->vba.ActiveDRAMClockChangeLatencyMargin); + + /* DCN32 has a new struct Watermarks (typedef) which is used to store + * calculated WM values. Copy over values from struct to vba varaibles + * to ensure that the DCN32 getters return the correct value. + */ + v->UrgentWatermark = v->Watermark.UrgentWatermark; + v->WritebackUrgentWatermark = v->Watermark.WritebackUrgentWatermark; + v->DRAMClockChangeWatermark = v->Watermark.DRAMClockChangeWatermark; + v->WritebackDRAMClockChangeWatermark = v->Watermark.WritebackDRAMClockChangeWatermark; + v->StutterExitWatermark = v->Watermark.StutterExitWatermark; + v->StutterEnterPlusExitWatermark = v->Watermark.StutterEnterPlusExitWatermark; + v->Z8StutterExitWatermark = v->Watermark.Z8StutterExitWatermark; + v->Z8StutterEnterPlusExitWatermark = v->Watermark.Z8StutterEnterPlusExitWatermark; + + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + if (mode_lib->vba.WritebackEnable[k] == true) { + v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(0, + v->VStartup[k] * mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k] + - v->Watermark.WritebackDRAMClockChangeWatermark); + v->WritebackAllowFCLKChangeEndPosition[k] = dml_max(0, + v->VStartup[k] * mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k] + - v->Watermark.WritebackFCLKChangeWatermark); + } else { + v->WritebackAllowDRAMClockChangeEndPosition[k] = 0; + v->WritebackAllowFCLKChangeEndPosition[k] = 0; + } + } + } + + //Display Pipeline Delivery Time in Prefetch, Groups + dml32_CalculatePixelDeliveryTimes( + mode_lib->vba.NumberOfActiveSurfaces, + mode_lib->vba.VRatio, + mode_lib->vba.VRatioChroma, + v->VRatioPrefetchY, + v->VRatioPrefetchC, + v->swath_width_luma_ub, + v->swath_width_chroma_ub, + mode_lib->vba.DPPPerPlane, + mode_lib->vba.HRatio, + mode_lib->vba.HRatioChroma, + mode_lib->vba.PixelClock, + v->PSCL_THROUGHPUT_LUMA, + v->PSCL_THROUGHPUT_CHROMA, + mode_lib->vba.DPPCLK, + v->BytePerPixelC, + mode_lib->vba.SourceRotation, + mode_lib->vba.NumberOfCursors, + mode_lib->vba.CursorWidth, + mode_lib->vba.CursorBPP, + v->BlockWidth256BytesY, + v->BlockHeight256BytesY, + v->BlockWidth256BytesC, + v->BlockHeight256BytesC, + + /* Output */ + v->DisplayPipeLineDeliveryTimeLuma, + v->DisplayPipeLineDeliveryTimeChroma, + v->DisplayPipeLineDeliveryTimeLumaPrefetch, + v->DisplayPipeLineDeliveryTimeChromaPrefetch, + v->DisplayPipeRequestDeliveryTimeLuma, + v->DisplayPipeRequestDeliveryTimeChroma, + v->DisplayPipeRequestDeliveryTimeLumaPrefetch, + v->DisplayPipeRequestDeliveryTimeChromaPrefetch, + v->CursorRequestDeliveryTime, + v->CursorRequestDeliveryTimePrefetch); + + dml32_CalculateMetaAndPTETimes(v->Use_One_Row_For_Frame, + mode_lib->vba.NumberOfActiveSurfaces, + mode_lib->vba.GPUVMEnable, + mode_lib->vba.MetaChunkSize, + mode_lib->vba.MinMetaChunkSizeBytes, + mode_lib->vba.HTotal, + mode_lib->vba.VRatio, + mode_lib->vba.VRatioChroma, + v->DestinationLinesToRequestRowInVBlank, + v->DestinationLinesToRequestRowInImmediateFlip, + mode_lib->vba.DCCEnable, + mode_lib->vba.PixelClock, + v->BytePerPixelY, + v->BytePerPixelC, + mode_lib->vba.SourceRotation, + v->dpte_row_height, + v->dpte_row_height_chroma, + v->meta_row_width, + v->meta_row_width_chroma, + v->meta_row_height, + v->meta_row_height_chroma, + v->meta_req_width, + v->meta_req_width_chroma, + v->meta_req_height, + v->meta_req_height_chroma, + v->dpte_group_bytes, + v->PTERequestSizeY, + v->PTERequestSizeC, + v->PixelPTEReqWidthY, + v->PixelPTEReqHeightY, + v->PixelPTEReqWidthC, + v->PixelPTEReqHeightC, + v->dpte_row_width_luma_ub, + v->dpte_row_width_chroma_ub, + + /* Output */ + v->DST_Y_PER_PTE_ROW_NOM_L, + v->DST_Y_PER_PTE_ROW_NOM_C, + v->DST_Y_PER_META_ROW_NOM_L, + v->DST_Y_PER_META_ROW_NOM_C, + v->TimePerMetaChunkNominal, + v->TimePerChromaMetaChunkNominal, + v->TimePerMetaChunkVBlank, + v->TimePerChromaMetaChunkVBlank, + v->TimePerMetaChunkFlip, + v->TimePerChromaMetaChunkFlip, + v->time_per_pte_group_nom_luma, + v->time_per_pte_group_vblank_luma, + v->time_per_pte_group_flip_luma, + v->time_per_pte_group_nom_chroma, + v->time_per_pte_group_vblank_chroma, + v->time_per_pte_group_flip_chroma); + + dml32_CalculateVMGroupAndRequestTimes( + mode_lib->vba.NumberOfActiveSurfaces, + mode_lib->vba.GPUVMEnable, + mode_lib->vba.GPUVMMaxPageTableLevels, + mode_lib->vba.HTotal, + v->BytePerPixelC, + v->DestinationLinesToRequestVMInVBlank, + v->DestinationLinesToRequestVMInImmediateFlip, + mode_lib->vba.DCCEnable, + mode_lib->vba.PixelClock, + v->dpte_row_width_luma_ub, + v->dpte_row_width_chroma_ub, + v->vm_group_bytes, + v->dpde0_bytes_per_frame_ub_l, + v->dpde0_bytes_per_frame_ub_c, + v->meta_pte_bytes_per_frame_ub_l, + v->meta_pte_bytes_per_frame_ub_c, + + /* Output */ + v->TimePerVMGroupVBlank, + v->TimePerVMGroupFlip, + v->TimePerVMRequestVBlank, + v->TimePerVMRequestFlip); + + // Min TTUVBlank + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + if (mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 0) { + v->MinTTUVBlank[k] = dml_max4(v->Watermark.DRAMClockChangeWatermark, + v->Watermark.FCLKChangeWatermark, v->Watermark.StutterEnterPlusExitWatermark, + v->Watermark.UrgentWatermark); + } else if (mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] + == 1) { + v->MinTTUVBlank[k] = dml_max3(v->Watermark.FCLKChangeWatermark, + v->Watermark.StutterEnterPlusExitWatermark, v->Watermark.UrgentWatermark); + } else if (mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] + == 2) { + v->MinTTUVBlank[k] = dml_max(v->Watermark.StutterEnterPlusExitWatermark, + v->Watermark.UrgentWatermark); + } else { + v->MinTTUVBlank[k] = v->Watermark.UrgentWatermark; + } + if (!mode_lib->vba.DynamicMetadataEnable[k]) + v->MinTTUVBlank[k] = mode_lib->vba.TCalc + v->MinTTUVBlank[k]; + } + + // DCC Configuration + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: Calculate DCC configuration for surface k=%d\n", __func__, k); +#endif + dml32_CalculateDCCConfiguration( + mode_lib->vba.DCCEnable[k], + mode_lib->vba.DCCProgrammingAssumesScanDirectionUnknownFinal, + mode_lib->vba.SourcePixelFormat[k], mode_lib->vba.SurfaceWidthY[k], + mode_lib->vba.SurfaceWidthC[k], + mode_lib->vba.SurfaceHeightY[k], + mode_lib->vba.SurfaceHeightC[k], + mode_lib->vba.nomDETInKByte, + v->BlockHeight256BytesY[k], + v->BlockHeight256BytesC[k], + mode_lib->vba.SurfaceTiling[k], + v->BytePerPixelY[k], + v->BytePerPixelC[k], + v->BytePerPixelDETY[k], + v->BytePerPixelDETC[k], + (enum dm_rotation_angle) mode_lib->vba.SourceScan[k], + /* Output */ + &v->DCCYMaxUncompressedBlock[k], + &v->DCCCMaxUncompressedBlock[k], + &v->DCCYMaxCompressedBlock[k], + &v->DCCCMaxCompressedBlock[k], + &v->DCCYIndependentBlock[k], + &v->DCCCIndependentBlock[k]); + } + + // VStartup Adjustment + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + bool isInterlaceTiming; + double Tvstartup_margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]; +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%d, MinTTUVBlank = %f (before vstartup margin)\n", __func__, k, + v->MinTTUVBlank[k]); +#endif + + v->MinTTUVBlank[k] = v->MinTTUVBlank[k] + Tvstartup_margin; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%d, Tvstartup_margin = %f\n", __func__, k, Tvstartup_margin); + dml_print("DML::%s: k=%d, MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]); + dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]); + dml_print("DML::%s: k=%d, MinTTUVBlank = %f\n", __func__, k, v->MinTTUVBlank[k]); +#endif + + v->Tdmdl[k] = v->Tdmdl[k] + Tvstartup_margin; + if (mode_lib->vba.DynamicMetadataEnable[k] && mode_lib->vba.DynamicMetadataVMEnabled) + v->Tdmdl_vm[k] = v->Tdmdl_vm[k] + Tvstartup_margin; + + isInterlaceTiming = (mode_lib->vba.Interlace[k] && + !mode_lib->vba.ProgressiveToInterlaceUnitInOPP); + + v->MIN_DST_Y_NEXT_START[k] = ((isInterlaceTiming ? dml_floor((mode_lib->vba.VTotal[k] - + mode_lib->vba.VFrontPorch[k]) / 2.0, 1.0) : + mode_lib->vba.VTotal[k]) - mode_lib->vba.VFrontPorch[k]) + + dml_max(1.0, + dml_ceil(v->WritebackDelay[mode_lib->vba.VoltageLevel][k] + / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]), 1.0)) + + dml_floor(4.0 * v->TSetup[k] / (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]), 1.0) / 4.0; + + v->VStartup[k] = (isInterlaceTiming ? (2 * v->MaxVStartupLines[k]) : v->MaxVStartupLines[k]); + + if (((v->VUpdateOffsetPix[k] + v->VUpdateWidthPix[k] + v->VReadyOffsetPix[k]) + / mode_lib->vba.HTotal[k]) <= (isInterlaceTiming ? dml_floor((mode_lib->vba.VTotal[k] + - mode_lib->vba.VActive[k] - mode_lib->vba.VFrontPorch[k] - v->VStartup[k]) / 2.0, 1.0) : + (int) (mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k] + - mode_lib->vba.VFrontPorch[k] - v->VStartup[k]))) { + v->VREADY_AT_OR_AFTER_VSYNC[k] = true; + } else { + v->VREADY_AT_OR_AFTER_VSYNC[k] = false; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%d, VStartup = %d (max)\n", __func__, k, v->VStartup[k]); + dml_print("DML::%s: k=%d, VUpdateOffsetPix = %d\n", __func__, k, v->VUpdateOffsetPix[k]); + dml_print("DML::%s: k=%d, VUpdateWidthPix = %d\n", __func__, k, v->VUpdateWidthPix[k]); + dml_print("DML::%s: k=%d, VReadyOffsetPix = %d\n", __func__, k, v->VReadyOffsetPix[k]); + dml_print("DML::%s: k=%d, HTotal = %d\n", __func__, k, mode_lib->vba.HTotal[k]); + dml_print("DML::%s: k=%d, VTotal = %d\n", __func__, k, mode_lib->vba.VTotal[k]); + dml_print("DML::%s: k=%d, VActive = %d\n", __func__, k, mode_lib->vba.VActive[k]); + dml_print("DML::%s: k=%d, VFrontPorch = %d\n", __func__, k, mode_lib->vba.VFrontPorch[k]); + dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]); + dml_print("DML::%s: k=%d, TSetup = %f\n", __func__, k, v->TSetup[k]); + dml_print("DML::%s: k=%d, MIN_DST_Y_NEXT_START = %f\n", __func__, k, v->MIN_DST_Y_NEXT_START[k]); + dml_print("DML::%s: k=%d, VREADY_AT_OR_AFTER_VSYNC = %d\n", __func__, k, + v->VREADY_AT_OR_AFTER_VSYNC[k]); +#endif + } + + { + //Maximum Bandwidth Used + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + if (mode_lib->vba.WritebackEnable[k] == true + && mode_lib->vba.WritebackPixelFormat[k] == dm_444_32) { + WRBandwidth = mode_lib->vba.WritebackDestinationWidth[k] + * mode_lib->vba.WritebackDestinationHeight[k] + / (mode_lib->vba.HTotal[k] * mode_lib->vba.WritebackSourceHeight[k] + / mode_lib->vba.PixelClock[k]) * 4; + } else if (mode_lib->vba.WritebackEnable[k] == true) { + WRBandwidth = mode_lib->vba.WritebackDestinationWidth[k] + * mode_lib->vba.WritebackDestinationHeight[k] + / (mode_lib->vba.HTotal[k] * mode_lib->vba.WritebackSourceHeight[k] + / mode_lib->vba.PixelClock[k]) * 8; + } + TotalWRBandwidth = TotalWRBandwidth + WRBandwidth; + } + + v->TotalDataReadBandwidth = 0; + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + v->TotalDataReadBandwidth = v->TotalDataReadBandwidth + v->ReadBandwidthSurfaceLuma[k] + + v->ReadBandwidthSurfaceChroma[k]; +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%d, TotalDataReadBandwidth = %f\n", + __func__, k, v->TotalDataReadBandwidth); + dml_print("DML::%s: k=%d, ReadBandwidthSurfaceLuma = %f\n", + __func__, k, v->ReadBandwidthSurfaceLuma[k]); + dml_print("DML::%s: k=%d, ReadBandwidthSurfaceChroma = %f\n", + __func__, k, v->ReadBandwidthSurfaceChroma[k]); +#endif + } + } + + // Stutter Efficiency + dml32_CalculateStutterEfficiency(v->CompressedBufferSizeInkByte, + mode_lib->vba.UsesMALLForPStateChange, + v->UnboundedRequestEnabled, + mode_lib->vba.MetaFIFOSizeInKEntries, + mode_lib->vba.ZeroSizeBufferEntries, + mode_lib->vba.PixelChunkSizeInKByte, + mode_lib->vba.NumberOfActiveSurfaces, + mode_lib->vba.ROBBufferSizeInKByte, + v->TotalDataReadBandwidth, + mode_lib->vba.DCFCLK, + mode_lib->vba.ReturnBW, + v->CompbufReservedSpace64B, + v->CompbufReservedSpaceZs, + mode_lib->vba.SRExitTime, + mode_lib->vba.SRExitZ8Time, + mode_lib->vba.SynchronizeTimingsFinal, + mode_lib->vba.BlendingAndTiming, + v->Watermark.StutterEnterPlusExitWatermark, + v->Watermark.Z8StutterEnterPlusExitWatermark, + mode_lib->vba.ProgressiveToInterlaceUnitInOPP, + mode_lib->vba.Interlace, + v->MinTTUVBlank, mode_lib->vba.DPPPerPlane, + mode_lib->vba.DETBufferSizeY, + v->BytePerPixelY, + v->BytePerPixelDETY, + v->SwathWidthY, + mode_lib->vba.SwathHeightY, + mode_lib->vba.SwathHeightC, + mode_lib->vba.DCCRateLuma, + mode_lib->vba.DCCRateChroma, + mode_lib->vba.DCCFractionOfZeroSizeRequestsLuma, + mode_lib->vba.DCCFractionOfZeroSizeRequestsChroma, + mode_lib->vba.HTotal, mode_lib->vba.VTotal, + mode_lib->vba.PixelClock, + mode_lib->vba.VRatio, + mode_lib->vba.SourceRotation, + v->BlockHeight256BytesY, + v->BlockWidth256BytesY, + v->BlockHeight256BytesC, + v->BlockWidth256BytesC, + v->DCCYMaxUncompressedBlock, + v->DCCCMaxUncompressedBlock, + mode_lib->vba.VActive, + mode_lib->vba.DCCEnable, + mode_lib->vba.WritebackEnable, + v->ReadBandwidthSurfaceLuma, + v->ReadBandwidthSurfaceChroma, + v->meta_row_bw, + v->dpte_row_bw, + /* Output */ + &v->StutterEfficiencyNotIncludingVBlank, + &v->StutterEfficiency, + &v->NumberOfStutterBurstsPerFrame, + &v->Z8StutterEfficiencyNotIncludingVBlank, + &v->Z8StutterEfficiency, + &v->Z8NumberOfStutterBurstsPerFrame, + &v->StutterPeriod, + &v->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE); + +#ifdef __DML_VBA_ALLOW_DELTA__ + { + unsigned int dummy_integer[1]; + + // Calculate z8 stutter eff assuming 0 reserved space + dml32_CalculateStutterEfficiency(v->CompressedBufferSizeInkByte, + mode_lib->vba.UsesMALLForPStateChange, + v->UnboundedRequestEnabled, + mode_lib->vba.MetaFIFOSizeInKEntries, + mode_lib->vba.ZeroSizeBufferEntries, + mode_lib->vba.PixelChunkSizeInKByte, + mode_lib->vba.NumberOfActiveSurfaces, + mode_lib->vba.ROBBufferSizeInKByte, + v->TotalDataReadBandwidth, + mode_lib->vba.DCFCLK, + mode_lib->vba.ReturnBW, + 0, //CompbufReservedSpace64B, + 0, //CompbufReservedSpaceZs, + mode_lib->vba.SRExitTime, + mode_lib->vba.SRExitZ8Time, + mode_lib->vba.SynchronizeTimingsFinal, + mode_lib->vba.BlendingAndTiming, + v->Watermark.StutterEnterPlusExitWatermark, + v->Watermark.Z8StutterEnterPlusExitWatermark, + mode_lib->vba.ProgressiveToInterlaceUnitInOPP, + mode_lib->vba.Interlace, + v->MinTTUVBlank, + mode_lib->vba.DPPPerPlane, + mode_lib->vba.DETBufferSizeY, + v->BytePerPixelY, v->BytePerPixelDETY, + v->SwathWidthY, mode_lib->vba.SwathHeightY, + mode_lib->vba.SwathHeightC, + mode_lib->vba.DCCRateLuma, + mode_lib->vba.DCCRateChroma, + mode_lib->vba.DCCFractionOfZeroSizeRequestsLuma, + mode_lib->vba.DCCFractionOfZeroSizeRequestsChroma, + mode_lib->vba.HTotal, + mode_lib->vba.VTotal, + mode_lib->vba.PixelClock, + mode_lib->vba.VRatio, + mode_lib->vba.SourceRotation, + v->BlockHeight256BytesY, + v->BlockWidth256BytesY, + v->BlockHeight256BytesC, + v->BlockWidth256BytesC, + v->DCCYMaxUncompressedBlock, + v->DCCCMaxUncompressedBlock, + mode_lib->vba.VActive, + mode_lib->vba.DCCEnable, + mode_lib->vba.WritebackEnable, + v->ReadBandwidthSurfaceLuma, + v->ReadBandwidthSurfaceChroma, + v->meta_row_bw, v->dpte_row_bw, + + /* Output */ + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[0], + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[1], + &dummy_integer[0], + &v->Z8StutterEfficiencyNotIncludingVBlankBestCase, + &v->Z8StutterEfficiencyBestCase, + &v->Z8NumberOfStutterBurstsPerFrameBestCase, + &v->StutterPeriodBestCase, + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_boolean); + } +#else + v->Z8StutterEfficiencyNotIncludingVBlankBestCase = v->Z8StutterEfficiencyNotIncludingVBlank; + v->Z8StutterEfficiencyBestCase = v->Z8StutterEfficiency; + v->Z8NumberOfStutterBurstsPerFrameBestCase = v->Z8NumberOfStutterBurstsPerFrame; + v->StutterPeriodBestCase = v->StutterPeriod; +#endif + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: --- END ---\n", __func__); +#endif +} + +static void mode_support_configuration(struct vba_vars_st *v, + struct display_mode_lib *mode_lib) +{ + int i, j; + + for (i = v->soc.num_states - 1; i >= 0; i--) { + for (j = 0; j < 2; j++) { + if (mode_lib->vba.ScaleRatioAndTapsSupport == true + && mode_lib->vba.SourceFormatPixelAndScanSupport == true + && mode_lib->vba.ViewportSizeSupport[i][j] == true + && !mode_lib->vba.LinkRateDoesNotMatchDPVersion + && !mode_lib->vba.LinkRateForMultistreamNotIndicated + && !mode_lib->vba.BPPForMultistreamNotIndicated + && !mode_lib->vba.MultistreamWithHDMIOreDP + && !mode_lib->vba.ExceededMultistreamSlots[i] + && !mode_lib->vba.MSOOrODMSplitWithNonDPLink + && !mode_lib->vba.NotEnoughLanesForMSO + && mode_lib->vba.LinkCapacitySupport[i] == true && !mode_lib->vba.P2IWith420 + //&& !mode_lib->vba.DSCOnlyIfNecessaryWithBPP + && !mode_lib->vba.DSC422NativeNotSupported + && !mode_lib->vba.MPCCombineMethodIncompatible + && mode_lib->vba.ODMCombine2To1SupportCheckOK[i] == true + && mode_lib->vba.ODMCombine4To1SupportCheckOK[i] == true + && mode_lib->vba.NotEnoughDSCUnits[i] == false + && !mode_lib->vba.NotEnoughDSCSlices[i] + && !mode_lib->vba.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe + && !mode_lib->vba.InvalidCombinationOfMALLUseForPStateAndStaticScreen + && mode_lib->vba.DSCCLKRequiredMoreThanSupported[i] == false + && mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i] + && mode_lib->vba.DTBCLKRequiredMoreThanSupported[i] == false + && !mode_lib->vba.InvalidCombinationOfMALLUseForPState + && !mode_lib->vba.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified + && mode_lib->vba.ROBSupport[i][j] == true + && mode_lib->vba.DISPCLK_DPPCLK_Support[i][j] == true + && mode_lib->vba.TotalAvailablePipesSupport[i][j] == true + && mode_lib->vba.NumberOfOTGSupport == true + && mode_lib->vba.NumberOfHDMIFRLSupport == true + && mode_lib->vba.EnoughWritebackUnits == true + && mode_lib->vba.WritebackLatencySupport == true + && mode_lib->vba.WritebackScaleRatioAndTapsSupport == true + && mode_lib->vba.CursorSupport == true && mode_lib->vba.PitchSupport == true + && mode_lib->vba.ViewportExceedsSurface == false + && mode_lib->vba.PrefetchSupported[i][j] == true + && mode_lib->vba.VActiveBandwithSupport[i][j] == true + && mode_lib->vba.DynamicMetadataSupported[i][j] == true + && mode_lib->vba.TotalVerticalActiveBandwidthSupport[i][j] == true + && mode_lib->vba.VRatioInPrefetchSupported[i][j] == true + && mode_lib->vba.PTEBufferSizeNotExceeded[i][j] == true + && mode_lib->vba.DCCMetaBufferSizeNotExceeded[i][j] == true + && mode_lib->vba.NonsupportedDSCInputBPC == false + && mode_lib->vba.NotEnoughDETSwathFillLatencyHidingPerState[i][j] == false + && !mode_lib->vba.ExceededMALLSize + && ((mode_lib->vba.HostVMEnable == false + && !mode_lib->vba.ImmediateFlipRequiredFinal) + || mode_lib->vba.ImmediateFlipSupportedForState[i][j]) + && (!mode_lib->vba.DRAMClockChangeRequirementFinal + || i == v->soc.num_states - 1 + || mode_lib->vba.DRAMClockChangeSupport[i][j] != dm_dram_clock_change_unsupported) + && (!mode_lib->vba.FCLKChangeRequirementFinal || i == v->soc.num_states - 1 + || mode_lib->vba.FCLKChangeSupport[i][j] != dm_fclock_change_unsupported) + && (!mode_lib->vba.USRRetrainingRequiredFinal + || mode_lib->vba.USRRetrainingSupport[i][j])) { + mode_lib->vba.ModeSupport[i][j] = true; + } else { + mode_lib->vba.ModeSupport[i][j] = false; + } + } + } +} + +void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) +{ + struct vba_vars_st *v = &mode_lib->vba; + int i, j; + unsigned int k, m; + unsigned int MaximumMPCCombine; + unsigned int NumberOfNonCombinedSurfaceOfMaximumBandwidth; + unsigned int TotalSlots; + bool CompBufReservedSpaceNeedAdjustment; + bool CompBufReservedSpaceNeedAdjustmentSingleDPP; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: called\n", __func__); +#endif + + /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/ + + /*Scale Ratio, taps Support Check*/ + + mode_lib->vba.ScaleRatioAndTapsSupport = true; + for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) { + if (mode_lib->vba.ScalerEnabled[k] == false + && ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8 + && mode_lib->vba.SourcePixelFormat[k] != dm_rgbe + && mode_lib->vba.SourcePixelFormat[k] != dm_rgbe_alpha) + || mode_lib->vba.HRatio[k] != 1.0 || mode_lib->vba.htaps[k] != 1.0 + || mode_lib->vba.VRatio[k] != 1.0 || mode_lib->vba.vtaps[k] != 1.0)) { + mode_lib->vba.ScaleRatioAndTapsSupport = false; + } else if (mode_lib->vba.vtaps[k] < 1.0 || mode_lib->vba.vtaps[k] > 8.0 || mode_lib->vba.htaps[k] < 1.0 + || mode_lib->vba.htaps[k] > 8.0 + || (mode_lib->vba.htaps[k] > 1.0 && (mode_lib->vba.htaps[k] % 2) == 1) + || mode_lib->vba.HRatio[k] > mode_lib->vba.MaxHSCLRatio + || mode_lib->vba.VRatio[k] > mode_lib->vba.MaxVSCLRatio + || mode_lib->vba.HRatio[k] > mode_lib->vba.htaps[k] + || mode_lib->vba.VRatio[k] > mode_lib->vba.vtaps[k] + || (mode_lib->vba.SourcePixelFormat[k] != dm_444_64 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8 + && mode_lib->vba.SourcePixelFormat[k] != dm_rgbe + && (mode_lib->vba.VTAPsChroma[k] < 1 + || mode_lib->vba.VTAPsChroma[k] > 8 + || mode_lib->vba.HTAPsChroma[k] < 1 + || mode_lib->vba.HTAPsChroma[k] > 8 + || (mode_lib->vba.HTAPsChroma[k] > 1 + && mode_lib->vba.HTAPsChroma[k] % 2 + == 1) + || mode_lib->vba.HRatioChroma[k] + > mode_lib->vba.MaxHSCLRatio + || mode_lib->vba.VRatioChroma[k] + > mode_lib->vba.MaxVSCLRatio + || mode_lib->vba.HRatioChroma[k] + > mode_lib->vba.HTAPsChroma[k] + || mode_lib->vba.VRatioChroma[k] + > mode_lib->vba.VTAPsChroma[k]))) { + mode_lib->vba.ScaleRatioAndTapsSupport = false; + } + } + + /*Source Format, Pixel Format and Scan Support Check*/ + mode_lib->vba.SourceFormatPixelAndScanSupport = true; + for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) { + if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear + && (!(!IsVertical((enum dm_rotation_angle) mode_lib->vba.SourceScan[k])) + || mode_lib->vba.DCCEnable[k] == true)) { + mode_lib->vba.SourceFormatPixelAndScanSupport = false; + } + } + + for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) { + dml32_CalculateBytePerPixelAndBlockSizes( + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.SurfaceTiling[k], + + /* Output */ + &mode_lib->vba.BytePerPixelY[k], + &mode_lib->vba.BytePerPixelC[k], + &mode_lib->vba.BytePerPixelInDETY[k], + &mode_lib->vba.BytePerPixelInDETC[k], + &mode_lib->vba.Read256BlockHeightY[k], + &mode_lib->vba.Read256BlockHeightC[k], + &mode_lib->vba.Read256BlockWidthY[k], + &mode_lib->vba.Read256BlockWidthC[k], + &mode_lib->vba.MacroTileHeightY[k], + &mode_lib->vba.MacroTileHeightC[k], + &mode_lib->vba.MacroTileWidthY[k], + &mode_lib->vba.MacroTileWidthC[k]); + } + + /*Bandwidth Support Check*/ + for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) { + if (!IsVertical(mode_lib->vba.SourceRotation[k])) { + v->SwathWidthYSingleDPP[k] = mode_lib->vba.ViewportWidth[k]; + v->SwathWidthCSingleDPP[k] = mode_lib->vba.ViewportWidthChroma[k]; + } else { + v->SwathWidthYSingleDPP[k] = mode_lib->vba.ViewportHeight[k]; + v->SwathWidthCSingleDPP[k] = mode_lib->vba.ViewportHeightChroma[k]; + } + } + for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) { + v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0) + / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k]; + v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0) + / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k] + / 2.0; + } + for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) { + if (mode_lib->vba.WritebackEnable[k] == true && mode_lib->vba.WritebackPixelFormat[k] == dm_444_64) { + v->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k] + * mode_lib->vba.WritebackDestinationHeight[k] + / (mode_lib->vba.WritebackSourceHeight[k] * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]) * 8.0; + } else if (mode_lib->vba.WritebackEnable[k] == true) { + v->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k] + * mode_lib->vba.WritebackDestinationHeight[k] + / (mode_lib->vba.WritebackSourceHeight[k] * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]) * 4.0; + } else { + v->WriteBandwidth[k] = 0.0; + } + } + + /*Writeback Latency support check*/ + + mode_lib->vba.WritebackLatencySupport = true; + for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) { + if (mode_lib->vba.WritebackEnable[k] == true + && (v->WriteBandwidth[k] + > mode_lib->vba.WritebackInterfaceBufferSize * 1024 + / mode_lib->vba.WritebackLatency)) { + mode_lib->vba.WritebackLatencySupport = false; + } + } + + /*Writeback Mode Support Check*/ + mode_lib->vba.EnoughWritebackUnits = true; + mode_lib->vba.TotalNumberOfActiveWriteback = 0; + for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) { + if (mode_lib->vba.WritebackEnable[k] == true) + mode_lib->vba.TotalNumberOfActiveWriteback = mode_lib->vba.TotalNumberOfActiveWriteback + 1; + } + + if (mode_lib->vba.TotalNumberOfActiveWriteback > mode_lib->vba.MaxNumWriteback) + mode_lib->vba.EnoughWritebackUnits = false; + + /*Writeback Scale Ratio and Taps Support Check*/ + mode_lib->vba.WritebackScaleRatioAndTapsSupport = true; + for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) { + if (mode_lib->vba.WritebackEnable[k] == true) { + if (mode_lib->vba.WritebackHRatio[k] > mode_lib->vba.WritebackMaxHSCLRatio + || mode_lib->vba.WritebackVRatio[k] > mode_lib->vba.WritebackMaxVSCLRatio + || mode_lib->vba.WritebackHRatio[k] < mode_lib->vba.WritebackMinHSCLRatio + || mode_lib->vba.WritebackVRatio[k] < mode_lib->vba.WritebackMinVSCLRatio + || mode_lib->vba.WritebackHTaps[k] > mode_lib->vba.WritebackMaxHSCLTaps + || mode_lib->vba.WritebackVTaps[k] > mode_lib->vba.WritebackMaxVSCLTaps + || mode_lib->vba.WritebackHRatio[k] > mode_lib->vba.WritebackHTaps[k] + || mode_lib->vba.WritebackVRatio[k] > mode_lib->vba.WritebackVTaps[k] + || (mode_lib->vba.WritebackHTaps[k] > 2.0 + && ((mode_lib->vba.WritebackHTaps[k] % 2) == 1))) { + mode_lib->vba.WritebackScaleRatioAndTapsSupport = false; + } + if (2.0 * mode_lib->vba.WritebackDestinationWidth[k] * (mode_lib->vba.WritebackVTaps[k] - 1) + * 57 > mode_lib->vba.WritebackLineBufferSize) { + mode_lib->vba.WritebackScaleRatioAndTapsSupport = false; + } + } + } + + for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) { + dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(mode_lib->vba.HRatio[k], mode_lib->vba.HRatioChroma[k], + mode_lib->vba.VRatio[k], mode_lib->vba.VRatioChroma[k], + mode_lib->vba.MaxDCHUBToPSCLThroughput, mode_lib->vba.MaxPSCLToLBThroughput, + mode_lib->vba.PixelClock[k], mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.htaps[k], mode_lib->vba.HTAPsChroma[k], mode_lib->vba.vtaps[k], + mode_lib->vba.VTAPsChroma[k], + /* Output */ + &mode_lib->vba.PSCL_FACTOR[k], &mode_lib->vba.PSCL_FACTOR_CHROMA[k], + &mode_lib->vba.MinDPPCLKUsingSingleDPP[k]); + } + + for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) { + + if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) { + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaximumSwathWidthSupportLuma = 8192; + } else if (!IsVertical(mode_lib->vba.SourceRotation[k]) && v->BytePerPixelC[k] > 0 + && mode_lib->vba.SourcePixelFormat[k] != dm_rgbe_alpha) { + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaximumSwathWidthSupportLuma = 7680; + } else if (IsVertical(mode_lib->vba.SourceRotation[k]) && v->BytePerPixelC[k] > 0 + && mode_lib->vba.SourcePixelFormat[k] != dm_rgbe_alpha) { + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaximumSwathWidthSupportLuma = 4320; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_rgbe_alpha) { + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaximumSwathWidthSupportLuma = 3840; + } else if (IsVertical(mode_lib->vba.SourceRotation[k]) && v->BytePerPixelY[k] == 8 && + mode_lib->vba.DCCEnable[k] == true) { + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaximumSwathWidthSupportLuma = 3072; + } else { + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaximumSwathWidthSupportLuma = 6144; + } + + if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8 || mode_lib->vba.SourcePixelFormat[k] == dm_420_10 + || mode_lib->vba.SourcePixelFormat[k] == dm_420_12) { + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaximumSwathWidthSupportChroma = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaximumSwathWidthSupportLuma / 2.0; + } else { + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaximumSwathWidthSupportChroma = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaximumSwathWidthSupportLuma; + } + v->MaximumSwathWidthInLineBufferLuma = mode_lib->vba.LineBufferSizeFinal + * dml_max(mode_lib->vba.HRatio[k], 1.0) / mode_lib->vba.LBBitPerPixel[k] + / (mode_lib->vba.vtaps[k] + dml_max(dml_ceil(mode_lib->vba.VRatio[k], 1.0) - 2, 0.0)); + if (v->BytePerPixelC[k] == 0.0) { + v->MaximumSwathWidthInLineBufferChroma = 0; + } else { + v->MaximumSwathWidthInLineBufferChroma = mode_lib->vba.LineBufferSizeFinal + * dml_max(mode_lib->vba.HRatioChroma[k], 1.0) / mode_lib->vba.LBBitPerPixel[k] + / (mode_lib->vba.VTAPsChroma[k] + + dml_max(dml_ceil(mode_lib->vba.VRatioChroma[k], 1.0) - 2, + 0.0)); + } + v->MaximumSwathWidthLuma[k] = dml_min(v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaximumSwathWidthSupportLuma, + v->MaximumSwathWidthInLineBufferLuma); + v->MaximumSwathWidthChroma[k] = dml_min(v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaximumSwathWidthSupportChroma, + v->MaximumSwathWidthInLineBufferChroma); + } + + dml32_CalculateSwathAndDETConfiguration( + mode_lib->vba.DETSizeOverride, + mode_lib->vba.UsesMALLForPStateChange, + mode_lib->vba.ConfigReturnBufferSizeInKByte, + mode_lib->vba.MaxTotalDETInKByte, + mode_lib->vba.MinCompressedBufferSizeInKByte, + 1, /* ForceSingleDPP */ + mode_lib->vba.NumberOfActiveSurfaces, + mode_lib->vba.nomDETInKByte, + mode_lib->vba.UseUnboundedRequesting, + mode_lib->vba.DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment, + mode_lib->vba.ip.pixel_chunk_size_kbytes, + mode_lib->vba.ip.rob_buffer_size_kbytes, + mode_lib->vba.CompressedBufferSegmentSizeInkByteFinal, + mode_lib->vba.Output, + mode_lib->vba.ReadBandwidthLuma, + mode_lib->vba.ReadBandwidthChroma, + mode_lib->vba.MaximumSwathWidthLuma, + mode_lib->vba.MaximumSwathWidthChroma, + mode_lib->vba.SourceRotation, + mode_lib->vba.ViewportStationary, + mode_lib->vba.SourcePixelFormat, + mode_lib->vba.SurfaceTiling, + mode_lib->vba.ViewportWidth, + mode_lib->vba.ViewportHeight, + mode_lib->vba.ViewportXStartY, + mode_lib->vba.ViewportYStartY, + mode_lib->vba.ViewportXStartC, + mode_lib->vba.ViewportYStartC, + mode_lib->vba.SurfaceWidthY, + mode_lib->vba.SurfaceWidthC, + mode_lib->vba.SurfaceHeightY, + mode_lib->vba.SurfaceHeightC, + mode_lib->vba.Read256BlockHeightY, + mode_lib->vba.Read256BlockHeightC, + mode_lib->vba.Read256BlockWidthY, + mode_lib->vba.Read256BlockWidthC, + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_odm_mode, + mode_lib->vba.BlendingAndTiming, + mode_lib->vba.BytePerPixelY, + mode_lib->vba.BytePerPixelC, + mode_lib->vba.BytePerPixelInDETY, + mode_lib->vba.BytePerPixelInDETC, + mode_lib->vba.HActive, + mode_lib->vba.HRatio, + mode_lib->vba.HRatioChroma, + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[0], /* Integer DPPPerSurface[] */ + + /* Output */ + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[1], /* Long swath_width_luma_ub[] */ + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[2], /* Long swath_width_chroma_ub[] */ + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_double_array[0], /* Long SwathWidth[] */ + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_double_array[1], /* Long SwathWidthChroma[] */ + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[3], /* Integer SwathHeightY[] */ + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[4], /* Integer SwathHeightC[] */ + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[5], /* Long DETBufferSizeInKByte[] */ + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[6], /* Long DETBufferSizeY[] */ + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[7], /* Long DETBufferSizeC[] */ + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_boolean_array[0][0], /* bool *UnboundedRequestEnabled */ + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[0][0], /* Long *CompressedBufferSizeInkByte */ + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[1][0], /* Long *CompBufReservedSpaceKBytes */ + &CompBufReservedSpaceNeedAdjustmentSingleDPP, + mode_lib->vba.SingleDPPViewportSizeSupportPerSurface,/* bool ViewportSizeSupportPerSurface[] */ + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_boolean_array[1][0]); /* bool *ViewportSizeSupport */ + + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsNeededForPStateChangeAndVoltage = false; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsPossible = false; + + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + if (mode_lib->vba.MPCCombineUse[k] == dm_mpc_reduce_voltage_and_clocks) + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsNeededForPStateChangeAndVoltage = true; + if (mode_lib->vba.MPCCombineUse[k] == dm_mpc_always_when_possible) + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsPossible = true; + } + mode_lib->vba.MPCCombineMethodIncompatible = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsNeededForPStateChangeAndVoltage + && v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsPossible; + + for (i = 0; i < v->soc.num_states; i++) { + for (j = 0; j < 2; j++) { + mode_lib->vba.TotalNumberOfActiveDPP[i][j] = 0; + mode_lib->vba.TotalAvailablePipesSupport[i][j] = true; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeNoDSC = dm_odm_combine_mode_disabled; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeDSC = dm_odm_combine_mode_disabled; + + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + dml32_CalculateODMMode( + mode_lib->vba.MaximumPixelsPerLinePerDSCUnit, + mode_lib->vba.HActive[k], + mode_lib->vba.OutputFormat[k], + mode_lib->vba.Output[k], + mode_lib->vba.ODMUse[k], + mode_lib->vba.MaxDispclk[i], + mode_lib->vba.MaxDispclk[v->soc.num_states - 1], + false, + mode_lib->vba.TotalNumberOfActiveDPP[i][j], + mode_lib->vba.MaxNumDPP, + mode_lib->vba.PixelClock[k], + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading, + mode_lib->vba.DISPCLKRampingMargin, + mode_lib->vba.DISPCLKDPPCLKVCOSpeed, + mode_lib->vba.NumberOfDSCSlices[k], + + /* Output */ + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalAvailablePipesSupportNoDSC, + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NumberOfDPPNoDSC, + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeNoDSC, + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.RequiredDISPCLKPerSurfaceNoDSC); + + dml32_CalculateODMMode( + mode_lib->vba.MaximumPixelsPerLinePerDSCUnit, + mode_lib->vba.HActive[k], + mode_lib->vba.OutputFormat[k], + mode_lib->vba.Output[k], + mode_lib->vba.ODMUse[k], + mode_lib->vba.MaxDispclk[i], + mode_lib->vba.MaxDispclk[v->soc.num_states - 1], + true, + mode_lib->vba.TotalNumberOfActiveDPP[i][j], + mode_lib->vba.MaxNumDPP, + mode_lib->vba.PixelClock[k], + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading, + mode_lib->vba.DISPCLKRampingMargin, + mode_lib->vba.DISPCLKDPPCLKVCOSpeed, + mode_lib->vba.NumberOfDSCSlices[k], + + /* Output */ + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalAvailablePipesSupportDSC, + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NumberOfDPPDSC, + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeDSC, + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.RequiredDISPCLKPerSurfaceDSC); + + dml32_CalculateOutputLink( + mode_lib->vba.PHYCLKPerState[i], + mode_lib->vba.PHYCLKD18PerState[i], + mode_lib->vba.PHYCLKD32PerState[i], + mode_lib->vba.Downspreading, + (mode_lib->vba.BlendingAndTiming[k] == k), + mode_lib->vba.Output[k], + mode_lib->vba.OutputFormat[k], + mode_lib->vba.HTotal[k], + mode_lib->vba.HActive[k], + mode_lib->vba.PixelClockBackEnd[k], + mode_lib->vba.ForcedOutputLinkBPP[k], + mode_lib->vba.DSCInputBitPerComponent[k], + mode_lib->vba.NumberOfDSCSlices[k], + mode_lib->vba.AudioSampleRate[k], + mode_lib->vba.AudioSampleLayout[k], + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeNoDSC, + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeDSC, + mode_lib->vba.DSCEnable[k], + mode_lib->vba.OutputLinkDPLanes[k], + mode_lib->vba.OutputLinkDPRate[k], + + /* Output */ + &mode_lib->vba.RequiresDSC[i][k], + &mode_lib->vba.RequiresFEC[i][k], + &mode_lib->vba.OutputBppPerState[i][k], + &mode_lib->vba.OutputTypePerState[i][k], + &mode_lib->vba.OutputRatePerState[i][k], + &mode_lib->vba.RequiredSlots[i][k]); + + if (mode_lib->vba.RequiresDSC[i][k] == false) { + mode_lib->vba.ODMCombineEnablePerState[i][k] = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeNoDSC; + mode_lib->vba.RequiredDISPCLKPerSurface[i][j][k] = + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.RequiredDISPCLKPerSurfaceNoDSC; + if (!v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalAvailablePipesSupportNoDSC) + mode_lib->vba.TotalAvailablePipesSupport[i][j] = false; + mode_lib->vba.TotalNumberOfActiveDPP[i][j] = + mode_lib->vba.TotalNumberOfActiveDPP[i][j] + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NumberOfDPPNoDSC; + } else { + mode_lib->vba.ODMCombineEnablePerState[i][k] = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeDSC; + mode_lib->vba.RequiredDISPCLKPerSurface[i][j][k] = + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.RequiredDISPCLKPerSurfaceDSC; + if (!v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalAvailablePipesSupportDSC) + mode_lib->vba.TotalAvailablePipesSupport[i][j] = false; + mode_lib->vba.TotalNumberOfActiveDPP[i][j] = + mode_lib->vba.TotalNumberOfActiveDPP[i][j] + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NumberOfDPPDSC; + } + } + + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + if (mode_lib->vba.ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) { + mode_lib->vba.MPCCombine[i][j][k] = false; + mode_lib->vba.NoOfDPP[i][j][k] = 4; + } else if (mode_lib->vba.ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { + mode_lib->vba.MPCCombine[i][j][k] = false; + mode_lib->vba.NoOfDPP[i][j][k] = 2; + } else if (mode_lib->vba.MPCCombineUse[k] == dm_mpc_never) { + mode_lib->vba.MPCCombine[i][j][k] = false; + mode_lib->vba.NoOfDPP[i][j][k] = 1; + } else if (dml32_RoundToDFSGranularity( + mode_lib->vba.MinDPPCLKUsingSingleDPP[k] + * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading + / 100), 1, + mode_lib->vba.DISPCLKDPPCLKVCOSpeed) <= mode_lib->vba.MaxDppclk[i] && + mode_lib->vba.SingleDPPViewportSizeSupportPerSurface[k] == true) { + mode_lib->vba.MPCCombine[i][j][k] = false; + mode_lib->vba.NoOfDPP[i][j][k] = 1; + } else if (mode_lib->vba.TotalNumberOfActiveDPP[i][j] < mode_lib->vba.MaxNumDPP) { + mode_lib->vba.MPCCombine[i][j][k] = true; + mode_lib->vba.NoOfDPP[i][j][k] = 2; + mode_lib->vba.TotalNumberOfActiveDPP[i][j] = + mode_lib->vba.TotalNumberOfActiveDPP[i][j] + 1; + } else { + mode_lib->vba.MPCCombine[i][j][k] = false; + mode_lib->vba.NoOfDPP[i][j][k] = 1; + mode_lib->vba.TotalAvailablePipesSupport[i][j] = false; + } + } + + mode_lib->vba.TotalNumberOfSingleDPPSurfaces[i][j] = 0; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NoChroma = true; + + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + if (mode_lib->vba.NoOfDPP[i][j][k] == 1) + mode_lib->vba.TotalNumberOfSingleDPPSurfaces[i][j] = + mode_lib->vba.TotalNumberOfSingleDPPSurfaces[i][j] + 1; + if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8 + || mode_lib->vba.SourcePixelFormat[k] == dm_420_10 + || mode_lib->vba.SourcePixelFormat[k] == dm_420_12 + || mode_lib->vba.SourcePixelFormat[k] == dm_rgbe_alpha) { + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NoChroma = false; + } + } + + // if TotalNumberOfActiveDPP is > 1, then there should be no unbounded req mode (hw limitation), the comp buf reserved adjustment is not needed regardless + // if TotalNumberOfActiveDPP is == 1, then will use the SingleDPP version of unbounded_req for the decision + CompBufReservedSpaceNeedAdjustment = (mode_lib->vba.TotalNumberOfActiveDPP[i][j] > 1) ? 0 : CompBufReservedSpaceNeedAdjustmentSingleDPP; + + + + if (j == 1 && !dml32_UnboundedRequest(mode_lib->vba.UseUnboundedRequesting, + mode_lib->vba.TotalNumberOfActiveDPP[i][j], v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NoChroma, + mode_lib->vba.Output[0], + mode_lib->vba.SurfaceTiling[0], + CompBufReservedSpaceNeedAdjustment, + mode_lib->vba.DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)) { + while (!(mode_lib->vba.TotalNumberOfActiveDPP[i][j] >= mode_lib->vba.MaxNumDPP + || mode_lib->vba.TotalNumberOfSingleDPPSurfaces[i][j] == 0)) { + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.BWOfNonCombinedSurfaceOfMaximumBandwidth = 0; + NumberOfNonCombinedSurfaceOfMaximumBandwidth = 0; + + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + if (mode_lib->vba.MPCCombineUse[k] + != dm_mpc_never && + mode_lib->vba.MPCCombineUse[k] != dm_mpc_reduce_voltage && + mode_lib->vba.ReadBandwidthLuma[k] + + mode_lib->vba.ReadBandwidthChroma[k] > + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.BWOfNonCombinedSurfaceOfMaximumBandwidth && + (mode_lib->vba.ODMCombineEnablePerState[i][k] != + dm_odm_combine_mode_2to1 && + mode_lib->vba.ODMCombineEnablePerState[i][k] != + dm_odm_combine_mode_4to1) && + mode_lib->vba.MPCCombine[i][j][k] == false) { + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.BWOfNonCombinedSurfaceOfMaximumBandwidth = + mode_lib->vba.ReadBandwidthLuma[k] + + mode_lib->vba.ReadBandwidthChroma[k]; + NumberOfNonCombinedSurfaceOfMaximumBandwidth = k; + } + } + mode_lib->vba.MPCCombine[i][j][NumberOfNonCombinedSurfaceOfMaximumBandwidth] = + true; + mode_lib->vba.NoOfDPP[i][j][NumberOfNonCombinedSurfaceOfMaximumBandwidth] = 2; + mode_lib->vba.TotalNumberOfActiveDPP[i][j] = + mode_lib->vba.TotalNumberOfActiveDPP[i][j] + 1; + mode_lib->vba.TotalNumberOfSingleDPPSurfaces[i][j] = + mode_lib->vba.TotalNumberOfSingleDPPSurfaces[i][j] - 1; + } + } + + //DISPCLK/DPPCLK + mode_lib->vba.WritebackRequiredDISPCLK = 0; + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + if (mode_lib->vba.WritebackEnable[k]) { + mode_lib->vba.WritebackRequiredDISPCLK = dml_max( + mode_lib->vba.WritebackRequiredDISPCLK, + dml32_CalculateWriteBackDISPCLK( + mode_lib->vba.WritebackPixelFormat[k], + mode_lib->vba.PixelClock[k], + mode_lib->vba.WritebackHRatio[k], + mode_lib->vba.WritebackVRatio[k], + mode_lib->vba.WritebackHTaps[k], + mode_lib->vba.WritebackVTaps[k], + mode_lib->vba.WritebackSourceWidth[k], + mode_lib->vba.WritebackDestinationWidth[k], + mode_lib->vba.HTotal[k], + mode_lib->vba.WritebackLineBufferSize, + mode_lib->vba.DISPCLKDPPCLKVCOSpeed)); + } + } + + mode_lib->vba.RequiredDISPCLK[i][j] = mode_lib->vba.WritebackRequiredDISPCLK; + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + mode_lib->vba.RequiredDISPCLK[i][j] = dml_max(mode_lib->vba.RequiredDISPCLK[i][j], + mode_lib->vba.RequiredDISPCLKPerSurface[i][j][k]); + } + + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) + mode_lib->vba.NoOfDPPThisState[k] = mode_lib->vba.NoOfDPP[i][j][k]; + + dml32_CalculateDPPCLK(mode_lib->vba.NumberOfActiveSurfaces, + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading, + mode_lib->vba.DISPCLKDPPCLKVCOSpeed, mode_lib->vba.MinDPPCLKUsingSingleDPP, + mode_lib->vba.NoOfDPPThisState, + /* Output */ + &mode_lib->vba.GlobalDPPCLK, mode_lib->vba.RequiredDPPCLKThisState); + + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) + mode_lib->vba.RequiredDPPCLK[i][j][k] = mode_lib->vba.RequiredDPPCLKThisState[k]; + + mode_lib->vba.DISPCLK_DPPCLK_Support[i][j] = !((mode_lib->vba.RequiredDISPCLK[i][j] + > mode_lib->vba.MaxDispclk[i]) + || (mode_lib->vba.GlobalDPPCLK > mode_lib->vba.MaxDppclk[i])); + + if (mode_lib->vba.TotalNumberOfActiveDPP[i][j] > mode_lib->vba.MaxNumDPP) + mode_lib->vba.TotalAvailablePipesSupport[i][j] = false; + } // j + } // i (VOLTAGE_STATE) + + /* Total Available OTG, HDMIFRL, DP Support Check */ + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveOTG = 0; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveHDMIFRL = 0; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0 = 0; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0Outputs = 0; + + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + if (mode_lib->vba.BlendingAndTiming[k] == k) { + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveOTG = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveOTG + 1; + if (mode_lib->vba.Output[k] == dm_dp2p0) { + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0 = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0 + 1; + if (mode_lib->vba.OutputMultistreamId[k] + == k || mode_lib->vba.OutputMultistreamEn[k] == false) { + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0Outputs = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0Outputs + 1; + } + } + } + } + + mode_lib->vba.NumberOfOTGSupport = (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveOTG <= mode_lib->vba.MaxNumOTG); + mode_lib->vba.NumberOfHDMIFRLSupport = (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveHDMIFRL <= mode_lib->vba.MaxNumHDMIFRLOutputs); + mode_lib->vba.NumberOfDP2p0Support = (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0 <= mode_lib->vba.MaxNumDP2p0Streams + && v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0Outputs <= mode_lib->vba.MaxNumDP2p0Outputs); + + /* Display IO and DSC Support Check */ + mode_lib->vba.NonsupportedDSCInputBPC = false; + for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) { + if (!(mode_lib->vba.DSCInputBitPerComponent[k] == 12.0 + || mode_lib->vba.DSCInputBitPerComponent[k] == 10.0 + || mode_lib->vba.DSCInputBitPerComponent[k] == 8.0) + || mode_lib->vba.DSCInputBitPerComponent[k] > mode_lib->vba.MaximumDSCBitsPerComponent) { + mode_lib->vba.NonsupportedDSCInputBPC = true; + } + } + + for (i = 0; i < v->soc.num_states; ++i) { + mode_lib->vba.ExceededMultistreamSlots[i] = false; + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + if (mode_lib->vba.OutputMultistreamEn[k] == true && mode_lib->vba.OutputMultistreamId[k] == k) { + TotalSlots = mode_lib->vba.RequiredSlots[i][k]; + for (j = 0; j < mode_lib->vba.NumberOfActiveSurfaces; ++j) { + if (mode_lib->vba.OutputMultistreamId[j] == k) + TotalSlots = TotalSlots + mode_lib->vba.RequiredSlots[i][j]; + } + if (mode_lib->vba.Output[k] == dm_dp && TotalSlots > 63) + mode_lib->vba.ExceededMultistreamSlots[i] = true; + if (mode_lib->vba.Output[k] == dm_dp2p0 && TotalSlots > 64) + mode_lib->vba.ExceededMultistreamSlots[i] = true; + } + } + mode_lib->vba.LinkCapacitySupport[i] = true; + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + if (mode_lib->vba.BlendingAndTiming[k] == k + && (mode_lib->vba.Output[k] == dm_dp || mode_lib->vba.Output[k] == dm_dp2p0 + || mode_lib->vba.Output[k] == dm_edp + || mode_lib->vba.Output[k] == dm_hdmi) + && mode_lib->vba.OutputBppPerState[i][k] == 0) { + mode_lib->vba.LinkCapacitySupport[i] = false; + } + } + } + + mode_lib->vba.P2IWith420 = false; + mode_lib->vba.DSCOnlyIfNecessaryWithBPP = false; + mode_lib->vba.DSC422NativeNotSupported = false; + mode_lib->vba.LinkRateDoesNotMatchDPVersion = false; + mode_lib->vba.LinkRateForMultistreamNotIndicated = false; + mode_lib->vba.BPPForMultistreamNotIndicated = false; + mode_lib->vba.MultistreamWithHDMIOreDP = false; + mode_lib->vba.MSOOrODMSplitWithNonDPLink = false; + mode_lib->vba.NotEnoughLanesForMSO = false; + + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + if (mode_lib->vba.BlendingAndTiming[k] == k + && (mode_lib->vba.Output[k] == dm_dp || mode_lib->vba.Output[k] == dm_dp2p0 + || mode_lib->vba.Output[k] == dm_edp + || mode_lib->vba.Output[k] == dm_hdmi)) { + if (mode_lib->vba.OutputFormat[k] + == dm_420 && mode_lib->vba.Interlace[k] == 1 && + mode_lib->vba.ProgressiveToInterlaceUnitInOPP == true) + mode_lib->vba.P2IWith420 = true; + + if (mode_lib->vba.DSCEnable[k] && mode_lib->vba.ForcedOutputLinkBPP[k] != 0) + mode_lib->vba.DSCOnlyIfNecessaryWithBPP = true; + if ((mode_lib->vba.DSCEnable[k] || mode_lib->vba.DSCEnable[k]) + && mode_lib->vba.OutputFormat[k] == dm_n422 + && !mode_lib->vba.DSC422NativeSupport) + mode_lib->vba.DSC422NativeNotSupported = true; + + if (((mode_lib->vba.OutputLinkDPRate[k] == dm_dp_rate_hbr + || mode_lib->vba.OutputLinkDPRate[k] == dm_dp_rate_hbr2 + || mode_lib->vba.OutputLinkDPRate[k] == dm_dp_rate_hbr3) + && mode_lib->vba.Output[k] != dm_dp && mode_lib->vba.Output[k] != dm_edp) + || ((mode_lib->vba.OutputLinkDPRate[k] == dm_dp_rate_uhbr10 + || mode_lib->vba.OutputLinkDPRate[k] == dm_dp_rate_uhbr13p5 + || mode_lib->vba.OutputLinkDPRate[k] == dm_dp_rate_uhbr20) + && mode_lib->vba.Output[k] != dm_dp2p0)) + mode_lib->vba.LinkRateDoesNotMatchDPVersion = true; + + if (mode_lib->vba.OutputMultistreamEn[k] == true) { + if (mode_lib->vba.OutputMultistreamId[k] == k + && mode_lib->vba.OutputLinkDPRate[k] == dm_dp_rate_na) + mode_lib->vba.LinkRateForMultistreamNotIndicated = true; + if (mode_lib->vba.OutputMultistreamId[k] == k && mode_lib->vba.ForcedOutputLinkBPP[k] == 0) + mode_lib->vba.BPPForMultistreamNotIndicated = true; + for (j = 0; j < mode_lib->vba.NumberOfActiveSurfaces; ++j) { + if (mode_lib->vba.OutputMultistreamId[k] == j + && mode_lib->vba.ForcedOutputLinkBPP[k] == 0) + mode_lib->vba.BPPForMultistreamNotIndicated = true; + } + } + + if ((mode_lib->vba.Output[k] == dm_edp || mode_lib->vba.Output[k] == dm_hdmi)) { + if (mode_lib->vba.OutputMultistreamEn[k] == true && mode_lib->vba.OutputMultistreamId[k] == k) + mode_lib->vba.MultistreamWithHDMIOreDP = true; + for (j = 0; j < mode_lib->vba.NumberOfActiveSurfaces; ++j) { + if (mode_lib->vba.OutputMultistreamEn[k] == true && mode_lib->vba.OutputMultistreamId[k] == j) + mode_lib->vba.MultistreamWithHDMIOreDP = true; + } + } + + if (mode_lib->vba.Output[k] != dm_dp + && (mode_lib->vba.ODMUse[k] == dm_odm_split_policy_1to2 + || mode_lib->vba.ODMUse[k] == dm_odm_mso_policy_1to2 + || mode_lib->vba.ODMUse[k] == dm_odm_mso_policy_1to4)) + mode_lib->vba.MSOOrODMSplitWithNonDPLink = true; + + if ((mode_lib->vba.ODMUse[k] == dm_odm_mso_policy_1to2 + && mode_lib->vba.OutputLinkDPLanes[k] < 2) + || (mode_lib->vba.ODMUse[k] == dm_odm_mso_policy_1to4 + && mode_lib->vba.OutputLinkDPLanes[k] < 4)) + mode_lib->vba.NotEnoughLanesForMSO = true; + } + } + + for (i = 0; i < v->soc.num_states; ++i) { + mode_lib->vba.DTBCLKRequiredMoreThanSupported[i] = false; + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + if (mode_lib->vba.BlendingAndTiming[k] == k + && dml32_RequiredDTBCLK(mode_lib->vba.RequiresDSC[i][k], + mode_lib->vba.PixelClockBackEnd[k], + mode_lib->vba.OutputFormat[k], + mode_lib->vba.OutputBppPerState[i][k], + mode_lib->vba.NumberOfDSCSlices[k], mode_lib->vba.HTotal[k], + mode_lib->vba.HActive[k], mode_lib->vba.AudioSampleRate[k], + mode_lib->vba.AudioSampleLayout[k]) + > mode_lib->vba.DTBCLKPerState[i]) { + mode_lib->vba.DTBCLKRequiredMoreThanSupported[i] = true; + } + } + } + + for (i = 0; i < v->soc.num_states; ++i) { + mode_lib->vba.ODMCombine2To1SupportCheckOK[i] = true; + mode_lib->vba.ODMCombine4To1SupportCheckOK[i] = true; + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + if (mode_lib->vba.BlendingAndTiming[k] == k + && mode_lib->vba.ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1 + && mode_lib->vba.Output[k] == dm_hdmi) { + mode_lib->vba.ODMCombine2To1SupportCheckOK[i] = false; + } + if (mode_lib->vba.BlendingAndTiming[k] == k + && mode_lib->vba.ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1 + && (mode_lib->vba.Output[k] == dm_dp || mode_lib->vba.Output[k] == dm_edp + || mode_lib->vba.Output[k] == dm_hdmi)) { + mode_lib->vba.ODMCombine4To1SupportCheckOK[i] = false; + } + } + } + + for (i = 0; i < v->soc.num_states; i++) { + mode_lib->vba.DSCCLKRequiredMoreThanSupported[i] = false; + for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) { + if (mode_lib->vba.BlendingAndTiming[k] == k) { + if (mode_lib->vba.Output[k] == dm_dp || mode_lib->vba.Output[k] == dm_dp2p0 + || mode_lib->vba.Output[k] == dm_edp) { + if (mode_lib->vba.OutputFormat[k] == dm_420) { + mode_lib->vba.DSCFormatFactor = 2; + } else if (mode_lib->vba.OutputFormat[k] == dm_444) { + mode_lib->vba.DSCFormatFactor = 1; + } else if (mode_lib->vba.OutputFormat[k] == dm_n422) { + mode_lib->vba.DSCFormatFactor = 2; + } else { + mode_lib->vba.DSCFormatFactor = 1; + } + if (mode_lib->vba.RequiresDSC[i][k] == true) { + if (mode_lib->vba.ODMCombineEnablePerState[i][k] + == dm_odm_combine_mode_4to1) { + if (mode_lib->vba.PixelClockBackEnd[k] / 12.0 / mode_lib->vba.DSCFormatFactor > (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i]) + mode_lib->vba.DSCCLKRequiredMoreThanSupported[i] = true; + } else if (mode_lib->vba.ODMCombineEnablePerState[i][k] + == dm_odm_combine_mode_2to1) { + if (mode_lib->vba.PixelClockBackEnd[k] / 6.0 / mode_lib->vba.DSCFormatFactor > (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i]) + mode_lib->vba.DSCCLKRequiredMoreThanSupported[i] = true; + } else { + if (mode_lib->vba.PixelClockBackEnd[k] / 3.0 / mode_lib->vba.DSCFormatFactor > (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i]) + mode_lib->vba.DSCCLKRequiredMoreThanSupported[i] = true; + } + } + } + } + } + } + + /* Check DSC Unit and Slices Support */ + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired = 0; + + for (i = 0; i < v->soc.num_states; ++i) { + mode_lib->vba.NotEnoughDSCUnits[i] = false; + mode_lib->vba.NotEnoughDSCSlices[i] = false; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired = 0; + mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i] = true; + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + if (mode_lib->vba.RequiresDSC[i][k] == true) { + if (mode_lib->vba.ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) { + if (mode_lib->vba.HActive[k] + > 4 * mode_lib->vba.MaximumPixelsPerLinePerDSCUnit) + mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i] = false; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired + 4; + if (mode_lib->vba.NumberOfDSCSlices[k] > 16) + mode_lib->vba.NotEnoughDSCSlices[i] = true; + } else if (mode_lib->vba.ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { + if (mode_lib->vba.HActive[k] + > 2 * mode_lib->vba.MaximumPixelsPerLinePerDSCUnit) + mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i] = false; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired + 2; + if (mode_lib->vba.NumberOfDSCSlices[k] > 8) + mode_lib->vba.NotEnoughDSCSlices[i] = true; + } else { + if (mode_lib->vba.HActive[k] > mode_lib->vba.MaximumPixelsPerLinePerDSCUnit) + mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i] = false; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired + 1; + if (mode_lib->vba.NumberOfDSCSlices[k] > 4) + mode_lib->vba.NotEnoughDSCSlices[i] = true; + } + } + } + if (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired > mode_lib->vba.NumberOfDSC) + mode_lib->vba.NotEnoughDSCUnits[i] = true; + } + + /*DSC Delay per state*/ + for (i = 0; i < v->soc.num_states; ++i) { + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + mode_lib->vba.DSCDelayPerState[i][k] = dml32_DSCDelayRequirement( + mode_lib->vba.RequiresDSC[i][k], mode_lib->vba.ODMCombineEnablePerState[i][k], + mode_lib->vba.DSCInputBitPerComponent[k], + mode_lib->vba.OutputBppPerState[i][k], mode_lib->vba.HActive[k], + mode_lib->vba.HTotal[k], mode_lib->vba.NumberOfDSCSlices[k], + mode_lib->vba.OutputFormat[k], mode_lib->vba.Output[k], + mode_lib->vba.PixelClock[k], mode_lib->vba.PixelClockBackEnd[k], + mode_lib->vba.ip.dsc_delay_factor_wa); + } + + for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) { + for (m = 0; m <= mode_lib->vba.NumberOfActiveSurfaces - 1; m++) { + for (j = 0; j <= mode_lib->vba.NumberOfActiveSurfaces - 1; j++) { + if (mode_lib->vba.BlendingAndTiming[k] == m && + mode_lib->vba.RequiresDSC[i][m] == true) { + mode_lib->vba.DSCDelayPerState[i][k] = + mode_lib->vba.DSCDelayPerState[i][m]; + } + } + } + } + } + + //Calculate Swath, DET Configuration, DCFCLKDeepSleep + // + for (i = 0; i < (int) v->soc.num_states; ++i) { + for (j = 0; j <= 1; ++j) { + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + mode_lib->vba.RequiredDPPCLKThisState[k] = mode_lib->vba.RequiredDPPCLK[i][j][k]; + mode_lib->vba.NoOfDPPThisState[k] = mode_lib->vba.NoOfDPP[i][j][k]; + mode_lib->vba.ODMCombineEnableThisState[k] = + mode_lib->vba.ODMCombineEnablePerState[i][k]; + } + + dml32_CalculateSwathAndDETConfiguration( + mode_lib->vba.DETSizeOverride, + mode_lib->vba.UsesMALLForPStateChange, + mode_lib->vba.ConfigReturnBufferSizeInKByte, + mode_lib->vba.MaxTotalDETInKByte, + mode_lib->vba.MinCompressedBufferSizeInKByte, + false, /* ForceSingleDPP */ + mode_lib->vba.NumberOfActiveSurfaces, + mode_lib->vba.nomDETInKByte, + mode_lib->vba.UseUnboundedRequesting, + mode_lib->vba.DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment, + mode_lib->vba.ip.pixel_chunk_size_kbytes, + mode_lib->vba.ip.rob_buffer_size_kbytes, + mode_lib->vba.CompressedBufferSegmentSizeInkByteFinal, + mode_lib->vba.Output, + mode_lib->vba.ReadBandwidthLuma, + mode_lib->vba.ReadBandwidthChroma, + mode_lib->vba.MaximumSwathWidthLuma, + mode_lib->vba.MaximumSwathWidthChroma, + mode_lib->vba.SourceRotation, + mode_lib->vba.ViewportStationary, + mode_lib->vba.SourcePixelFormat, + mode_lib->vba.SurfaceTiling, + mode_lib->vba.ViewportWidth, + mode_lib->vba.ViewportHeight, + mode_lib->vba.ViewportXStartY, + mode_lib->vba.ViewportYStartY, + mode_lib->vba.ViewportXStartC, + mode_lib->vba.ViewportYStartC, + mode_lib->vba.SurfaceWidthY, + mode_lib->vba.SurfaceWidthC, + mode_lib->vba.SurfaceHeightY, + mode_lib->vba.SurfaceHeightC, + mode_lib->vba.Read256BlockHeightY, + mode_lib->vba.Read256BlockHeightC, + mode_lib->vba.Read256BlockWidthY, + mode_lib->vba.Read256BlockWidthC, + mode_lib->vba.ODMCombineEnableThisState, + mode_lib->vba.BlendingAndTiming, + mode_lib->vba.BytePerPixelY, + mode_lib->vba.BytePerPixelC, + mode_lib->vba.BytePerPixelInDETY, + mode_lib->vba.BytePerPixelInDETC, + mode_lib->vba.HActive, + mode_lib->vba.HRatio, + mode_lib->vba.HRatioChroma, + mode_lib->vba.NoOfDPPThisState, + /* Output */ + mode_lib->vba.swath_width_luma_ub_this_state, + mode_lib->vba.swath_width_chroma_ub_this_state, + mode_lib->vba.SwathWidthYThisState, + mode_lib->vba.SwathWidthCThisState, + mode_lib->vba.SwathHeightYThisState, + mode_lib->vba.SwathHeightCThisState, + mode_lib->vba.DETBufferSizeInKByteThisState, + mode_lib->vba.DETBufferSizeYThisState, + mode_lib->vba.DETBufferSizeCThisState, + &mode_lib->vba.UnboundedRequestEnabledThisState, + &mode_lib->vba.CompressedBufferSizeInkByteThisState, + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer[0], /* Long CompBufReservedSpaceKBytes */ + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_boolean[0], /* bool CompBufReservedSpaceNeedAdjustment */ + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_boolean_array[0], + &mode_lib->vba.ViewportSizeSupport[i][j]); + + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + mode_lib->vba.swath_width_luma_ub_all_states[i][j][k] = + mode_lib->vba.swath_width_luma_ub_this_state[k]; + mode_lib->vba.swath_width_chroma_ub_all_states[i][j][k] = + mode_lib->vba.swath_width_chroma_ub_this_state[k]; + mode_lib->vba.SwathWidthYAllStates[i][j][k] = mode_lib->vba.SwathWidthYThisState[k]; + mode_lib->vba.SwathWidthCAllStates[i][j][k] = mode_lib->vba.SwathWidthCThisState[k]; + mode_lib->vba.SwathHeightYAllStates[i][j][k] = mode_lib->vba.SwathHeightYThisState[k]; + mode_lib->vba.SwathHeightCAllStates[i][j][k] = mode_lib->vba.SwathHeightCThisState[k]; + mode_lib->vba.UnboundedRequestEnabledAllStates[i][j] = + mode_lib->vba.UnboundedRequestEnabledThisState; + mode_lib->vba.CompressedBufferSizeInkByteAllStates[i][j] = + mode_lib->vba.CompressedBufferSizeInkByteThisState; + mode_lib->vba.DETBufferSizeInKByteAllStates[i][j][k] = + mode_lib->vba.DETBufferSizeInKByteThisState[k]; + mode_lib->vba.DETBufferSizeYAllStates[i][j][k] = + mode_lib->vba.DETBufferSizeYThisState[k]; + mode_lib->vba.DETBufferSizeCAllStates[i][j][k] = + mode_lib->vba.DETBufferSizeCThisState[k]; + } + } + } + + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + mode_lib->vba.cursor_bw[k] = mode_lib->vba.NumberOfCursors[k] * mode_lib->vba.CursorWidth[k][0] + * mode_lib->vba.CursorBPP[k][0] / 8.0 + / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k]; + } + + dml32_CalculateSurfaceSizeInMall( + mode_lib->vba.NumberOfActiveSurfaces, + mode_lib->vba.MALLAllocatedForDCNFinal, + mode_lib->vba.UseMALLForStaticScreen, + mode_lib->vba.DCCEnable, + mode_lib->vba.ViewportStationary, + mode_lib->vba.ViewportXStartY, + mode_lib->vba.ViewportYStartY, + mode_lib->vba.ViewportXStartC, + mode_lib->vba.ViewportYStartC, + mode_lib->vba.ViewportWidth, + mode_lib->vba.ViewportHeight, + mode_lib->vba.BytePerPixelY, + mode_lib->vba.ViewportWidthChroma, + mode_lib->vba.ViewportHeightChroma, + mode_lib->vba.BytePerPixelC, + mode_lib->vba.SurfaceWidthY, + mode_lib->vba.SurfaceWidthC, + mode_lib->vba.SurfaceHeightY, + mode_lib->vba.SurfaceHeightC, + mode_lib->vba.Read256BlockWidthY, + mode_lib->vba.Read256BlockWidthC, + mode_lib->vba.Read256BlockHeightY, + mode_lib->vba.Read256BlockHeightC, + mode_lib->vba.MacroTileWidthY, + mode_lib->vba.MacroTileWidthC, + mode_lib->vba.MacroTileHeightY, + mode_lib->vba.MacroTileHeightC, + + /* Output */ + mode_lib->vba.SurfaceSizeInMALL, + &mode_lib->vba.ExceededMALLSize); + + for (i = 0; i < v->soc.num_states; i++) { + for (j = 0; j < 2; j++) { + for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) { + mode_lib->vba.swath_width_luma_ub_this_state[k] = + mode_lib->vba.swath_width_luma_ub_all_states[i][j][k]; + mode_lib->vba.swath_width_chroma_ub_this_state[k] = + mode_lib->vba.swath_width_chroma_ub_all_states[i][j][k]; + mode_lib->vba.SwathWidthYThisState[k] = mode_lib->vba.SwathWidthYAllStates[i][j][k]; + mode_lib->vba.SwathWidthCThisState[k] = mode_lib->vba.SwathWidthCAllStates[i][j][k]; + mode_lib->vba.SwathHeightYThisState[k] = mode_lib->vba.SwathHeightYAllStates[i][j][k]; + mode_lib->vba.SwathHeightCThisState[k] = mode_lib->vba.SwathHeightCAllStates[i][j][k]; + mode_lib->vba.DETBufferSizeInKByteThisState[k] = + mode_lib->vba.DETBufferSizeInKByteAllStates[i][j][k]; + mode_lib->vba.DETBufferSizeYThisState[k] = + mode_lib->vba.DETBufferSizeYAllStates[i][j][k]; + mode_lib->vba.DETBufferSizeCThisState[k] = + mode_lib->vba.DETBufferSizeCAllStates[i][j][k]; + mode_lib->vba.RequiredDPPCLKThisState[k] = mode_lib->vba.RequiredDPPCLK[i][j][k]; + mode_lib->vba.NoOfDPPThisState[k] = mode_lib->vba.NoOfDPP[i][j][k]; + } + + mode_lib->vba.TotalNumberOfDCCActiveDPP[i][j] = 0; + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + if (mode_lib->vba.DCCEnable[k] == true) { + mode_lib->vba.TotalNumberOfDCCActiveDPP[i][j] = + mode_lib->vba.TotalNumberOfDCCActiveDPP[i][j] + + mode_lib->vba.NoOfDPP[i][j][k]; + } + } + + + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].PixelClock = mode_lib->vba.PixelClock[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].DPPPerSurface = mode_lib->vba.NoOfDPP[i][j][k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].SourceRotation = mode_lib->vba.SourceRotation[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].ViewportHeight = mode_lib->vba.ViewportHeight[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].ViewportHeightChroma = mode_lib->vba.ViewportHeightChroma[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockWidth256BytesY = mode_lib->vba.Read256BlockWidthY[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockHeight256BytesY = mode_lib->vba.Read256BlockHeightY[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockWidth256BytesC = mode_lib->vba.Read256BlockWidthC[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockHeight256BytesC = mode_lib->vba.Read256BlockHeightC[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockWidthY = mode_lib->vba.MacroTileWidthY[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockHeightY = mode_lib->vba.MacroTileHeightY[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockWidthC = mode_lib->vba.MacroTileWidthC[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockHeightC = mode_lib->vba.MacroTileHeightC[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].InterlaceEnable = mode_lib->vba.Interlace[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].HTotal = mode_lib->vba.HTotal[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].DCCEnable = mode_lib->vba.DCCEnable[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].SourcePixelFormat = mode_lib->vba.SourcePixelFormat[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].SurfaceTiling = mode_lib->vba.SurfaceTiling[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BytePerPixelY = mode_lib->vba.BytePerPixelY[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BytePerPixelC = mode_lib->vba.BytePerPixelC[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].ProgressiveToInterlaceUnitInOPP = + mode_lib->vba.ProgressiveToInterlaceUnitInOPP; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].VRatio = mode_lib->vba.VRatio[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].VRatioChroma = mode_lib->vba.VRatioChroma[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].VTaps = mode_lib->vba.vtaps[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].VTapsChroma = mode_lib->vba.VTAPsChroma[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].PitchY = mode_lib->vba.PitchY[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].DCCMetaPitchY = mode_lib->vba.DCCMetaPitchY[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].PitchC = mode_lib->vba.PitchC[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].DCCMetaPitchC = mode_lib->vba.DCCMetaPitchC[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].ViewportStationary = mode_lib->vba.ViewportStationary[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].ViewportXStart = mode_lib->vba.ViewportXStartY[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].ViewportYStart = mode_lib->vba.ViewportYStartY[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].ViewportXStartC = mode_lib->vba.ViewportXStartC[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].ViewportYStartC = mode_lib->vba.ViewportYStartC[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].FORCE_ONE_ROW_FOR_FRAME = mode_lib->vba.ForceOneRowForFrame[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].SwathHeightY = mode_lib->vba.SwathHeightYThisState[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].SwathHeightC = mode_lib->vba.SwathHeightCThisState[k]; + } + + { + dml32_CalculateVMRowAndSwath( + mode_lib->vba.NumberOfActiveSurfaces, + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters, + mode_lib->vba.SurfaceSizeInMALL, + mode_lib->vba.PTEBufferSizeInRequestsLuma, + mode_lib->vba.PTEBufferSizeInRequestsChroma, + mode_lib->vba.DCCMetaBufferSizeBytes, + mode_lib->vba.UseMALLForStaticScreen, + mode_lib->vba.UsesMALLForPStateChange, + mode_lib->vba.MALLAllocatedForDCNFinal, + mode_lib->vba.SwathWidthYThisState, + mode_lib->vba.SwathWidthCThisState, + mode_lib->vba.GPUVMEnable, + mode_lib->vba.HostVMEnable, + mode_lib->vba.HostVMMaxNonCachedPageTableLevels, + mode_lib->vba.GPUVMMaxPageTableLevels, + mode_lib->vba.GPUVMMinPageSizeKBytes, + mode_lib->vba.HostVMMinPageSize, + + /* Output */ + mode_lib->vba.PTEBufferSizeNotExceededPerState, + mode_lib->vba.DCCMetaBufferSizeNotExceededPerState, + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[0], + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[1], + mode_lib->vba.dpte_row_height, + mode_lib->vba.dpte_row_height_chroma, + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[2], + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[3], + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[4], + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[5], + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[6], + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[7], + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[8], + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[9], + mode_lib->vba.meta_row_height, + mode_lib->vba.meta_row_height_chroma, + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[10], + mode_lib->vba.dpte_group_bytes, + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[11], + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[12], + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[13], + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[14], + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[15], + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[16], + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[17], + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[18], + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[19], + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[20], + mode_lib->vba.PrefetchLinesYThisState, + mode_lib->vba.PrefetchLinesCThisState, + mode_lib->vba.PrefillY, + mode_lib->vba.PrefillC, + mode_lib->vba.MaxNumSwY, + mode_lib->vba.MaxNumSwC, + mode_lib->vba.meta_row_bandwidth_this_state, + mode_lib->vba.dpte_row_bandwidth_this_state, + mode_lib->vba.DPTEBytesPerRowThisState, + mode_lib->vba.PDEAndMetaPTEBytesPerFrameThisState, + mode_lib->vba.MetaRowBytesThisState, + mode_lib->vba.use_one_row_for_frame_this_state, + mode_lib->vba.use_one_row_for_frame_flip_this_state, + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_boolean_array[0], // Boolean UsesMALLForStaticScreen[] + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_boolean_array[1], // Boolean PTE_BUFFER_MODE[] + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[21]); // Long BIGK_FRAGMENT_SIZE[] + } + + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + mode_lib->vba.PrefetchLinesY[i][j][k] = mode_lib->vba.PrefetchLinesYThisState[k]; + mode_lib->vba.PrefetchLinesC[i][j][k] = mode_lib->vba.PrefetchLinesCThisState[k]; + mode_lib->vba.meta_row_bandwidth[i][j][k] = + mode_lib->vba.meta_row_bandwidth_this_state[k]; + mode_lib->vba.dpte_row_bandwidth[i][j][k] = + mode_lib->vba.dpte_row_bandwidth_this_state[k]; + mode_lib->vba.DPTEBytesPerRow[i][j][k] = mode_lib->vba.DPTEBytesPerRowThisState[k]; + mode_lib->vba.PDEAndMetaPTEBytesPerFrame[i][j][k] = + mode_lib->vba.PDEAndMetaPTEBytesPerFrameThisState[k]; + mode_lib->vba.MetaRowBytes[i][j][k] = mode_lib->vba.MetaRowBytesThisState[k]; + mode_lib->vba.use_one_row_for_frame[i][j][k] = + mode_lib->vba.use_one_row_for_frame_this_state[k]; + mode_lib->vba.use_one_row_for_frame_flip[i][j][k] = + mode_lib->vba.use_one_row_for_frame_flip_this_state[k]; + } + + mode_lib->vba.PTEBufferSizeNotExceeded[i][j] = true; + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + if (mode_lib->vba.PTEBufferSizeNotExceededPerState[k] == false) + mode_lib->vba.PTEBufferSizeNotExceeded[i][j] = false; + } + + mode_lib->vba.DCCMetaBufferSizeNotExceeded[i][j] = true; + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + if (mode_lib->vba.DCCMetaBufferSizeNotExceededPerState[k] == false) + mode_lib->vba.DCCMetaBufferSizeNotExceeded[i][j] = false; + } + + mode_lib->vba.UrgLatency[i] = dml32_CalculateUrgentLatency( + mode_lib->vba.UrgentLatencyPixelDataOnly, + mode_lib->vba.UrgentLatencyPixelMixedWithVMData, + mode_lib->vba.UrgentLatencyVMDataOnly, mode_lib->vba.DoUrgentLatencyAdjustment, + mode_lib->vba.UrgentLatencyAdjustmentFabricClockComponent, + mode_lib->vba.UrgentLatencyAdjustmentFabricClockReference, + mode_lib->vba.FabricClockPerState[i]); + + //bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX]; + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + dml32_CalculateUrgentBurstFactor( + mode_lib->vba.UsesMALLForPStateChange[k], + mode_lib->vba.swath_width_luma_ub_this_state[k], + mode_lib->vba.swath_width_chroma_ub_this_state[k], + mode_lib->vba.SwathHeightYThisState[k], + mode_lib->vba.SwathHeightCThisState[k], + (double) mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k], + mode_lib->vba.UrgLatency[i], + mode_lib->vba.CursorBufferSize, + mode_lib->vba.CursorWidth[k][0], + mode_lib->vba.CursorBPP[k][0], + mode_lib->vba.VRatio[k], + mode_lib->vba.VRatioChroma[k], + mode_lib->vba.BytePerPixelInDETY[k], + mode_lib->vba.BytePerPixelInDETC[k], + mode_lib->vba.DETBufferSizeYThisState[k], + mode_lib->vba.DETBufferSizeCThisState[k], + /* Output */ + &mode_lib->vba.UrgentBurstFactorCursor[k], + &mode_lib->vba.UrgentBurstFactorLuma[k], + &mode_lib->vba.UrgentBurstFactorChroma[k], + &mode_lib->vba.NoUrgentLatencyHiding[k]); + } + + dml32_CalculateDCFCLKDeepSleep( + mode_lib->vba.NumberOfActiveSurfaces, + mode_lib->vba.BytePerPixelY, + mode_lib->vba.BytePerPixelC, + mode_lib->vba.VRatio, + mode_lib->vba.VRatioChroma, + mode_lib->vba.SwathWidthYThisState, + mode_lib->vba.SwathWidthCThisState, + mode_lib->vba.NoOfDPPThisState, + mode_lib->vba.HRatio, + mode_lib->vba.HRatioChroma, + mode_lib->vba.PixelClock, + mode_lib->vba.PSCL_FACTOR, + mode_lib->vba.PSCL_FACTOR_CHROMA, + mode_lib->vba.RequiredDPPCLKThisState, + mode_lib->vba.ReadBandwidthLuma, + mode_lib->vba.ReadBandwidthChroma, + mode_lib->vba.ReturnBusWidth, + + /* Output */ + &mode_lib->vba.ProjectedDCFCLKDeepSleep[i][j]); + } + } + + //Calculate Return BW + for (i = 0; i < (int) v->soc.num_states; ++i) { + for (j = 0; j <= 1; ++j) { + for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) { + if (mode_lib->vba.BlendingAndTiming[k] == k) { + if (mode_lib->vba.WritebackEnable[k] == true) { + mode_lib->vba.WritebackDelayTime[k] = + mode_lib->vba.WritebackLatency + + dml32_CalculateWriteBackDelay( + mode_lib->vba.WritebackPixelFormat[k], + mode_lib->vba.WritebackHRatio[k], + mode_lib->vba.WritebackVRatio[k], + mode_lib->vba.WritebackVTaps[k], + mode_lib->vba.WritebackDestinationWidth[k], + mode_lib->vba.WritebackDestinationHeight[k], + mode_lib->vba.WritebackSourceHeight[k], + mode_lib->vba.HTotal[k]) + / mode_lib->vba.RequiredDISPCLK[i][j]; + } else { + mode_lib->vba.WritebackDelayTime[k] = 0.0; + } + for (m = 0; m <= mode_lib->vba.NumberOfActiveSurfaces - 1; m++) { + if (mode_lib->vba.BlendingAndTiming[m] + == k && mode_lib->vba.WritebackEnable[m] == true) { + mode_lib->vba.WritebackDelayTime[k] = + dml_max(mode_lib->vba.WritebackDelayTime[k], + mode_lib->vba.WritebackLatency + + dml32_CalculateWriteBackDelay( + mode_lib->vba.WritebackPixelFormat[m], + mode_lib->vba.WritebackHRatio[m], + mode_lib->vba.WritebackVRatio[m], + mode_lib->vba.WritebackVTaps[m], + mode_lib->vba.WritebackDestinationWidth[m], + mode_lib->vba.WritebackDestinationHeight[m], + mode_lib->vba.WritebackSourceHeight[m], + mode_lib->vba.HTotal[m]) / + mode_lib->vba.RequiredDISPCLK[i][j]); + } + } + } + } + for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) { + for (m = 0; m <= mode_lib->vba.NumberOfActiveSurfaces - 1; m++) { + if (mode_lib->vba.BlendingAndTiming[k] == m) { + mode_lib->vba.WritebackDelayTime[k] = + mode_lib->vba.WritebackDelayTime[m]; + } + } + } + mode_lib->vba.MaxMaxVStartup[i][j] = 0; + for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) { + mode_lib->vba.MaximumVStartup[i][j][k] = ((mode_lib->vba.Interlace[k] && + !mode_lib->vba.ProgressiveToInterlaceUnitInOPP) ? + dml_floor((mode_lib->vba.VTotal[k] - + mode_lib->vba.VActive[k]) / 2.0, 1.0) : + mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]) + - dml_max(1.0, dml_ceil(1.0 * + mode_lib->vba.WritebackDelayTime[k] / + (mode_lib->vba.HTotal[k] / + mode_lib->vba.PixelClock[k]), 1.0)); + + // Clamp to max OTG vstartup register limit + if (mode_lib->vba.MaximumVStartup[i][j][k] > 1023) + mode_lib->vba.MaximumVStartup[i][j][k] = 1023; + + mode_lib->vba.MaxMaxVStartup[i][j] = dml_max(mode_lib->vba.MaxMaxVStartup[i][j], + mode_lib->vba.MaximumVStartup[i][j][k]); + } + } + } + + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ReorderingBytes = mode_lib->vba.NumberOfChannels + * dml_max3(mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly, + mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData, + mode_lib->vba.UrgentOutOfOrderReturnPerChannelVMDataOnly); + + dml32_CalculateMinAndMaxPrefetchMode(mode_lib->vba.AllowForPStateChangeOrStutterInVBlankFinal, + &mode_lib->vba.MinPrefetchMode, + &mode_lib->vba.MaxPrefetchMode); + + for (i = 0; i < (int) v->soc.num_states; ++i) { + for (j = 0; j <= 1; ++j) + mode_lib->vba.DCFCLKState[i][j] = mode_lib->vba.DCFCLKPerState[i]; + } + + /* Immediate Flip and MALL parameters */ + mode_lib->vba.ImmediateFlipRequiredFinal = false; + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + mode_lib->vba.ImmediateFlipRequiredFinal = mode_lib->vba.ImmediateFlipRequiredFinal + || (mode_lib->vba.ImmediateFlipRequirement[k] == dm_immediate_flip_required); + } + + mode_lib->vba.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified = false; + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + mode_lib->vba.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified = + mode_lib->vba.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified + || ((mode_lib->vba.ImmediateFlipRequirement[k] + != dm_immediate_flip_required) + && (mode_lib->vba.ImmediateFlipRequirement[k] + != dm_immediate_flip_not_required)); + } + mode_lib->vba.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified = + mode_lib->vba.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified + && mode_lib->vba.ImmediateFlipRequiredFinal; + + mode_lib->vba.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = false; + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + mode_lib->vba.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = + mode_lib->vba.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe || + ((mode_lib->vba.HostVMEnable == true || mode_lib->vba.ImmediateFlipRequirement[k] != + dm_immediate_flip_not_required) && + (mode_lib->vba.UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame || + mode_lib->vba.UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe)); + } + + mode_lib->vba.InvalidCombinationOfMALLUseForPStateAndStaticScreen = false; + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + mode_lib->vba.InvalidCombinationOfMALLUseForPStateAndStaticScreen = + mode_lib->vba.InvalidCombinationOfMALLUseForPStateAndStaticScreen + || ((mode_lib->vba.UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable + || mode_lib->vba.UseMALLForStaticScreen[k] == dm_use_mall_static_screen_optimize) + && (mode_lib->vba.UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe)) + || ((mode_lib->vba.UseMALLForStaticScreen[k] == dm_use_mall_static_screen_disable + || mode_lib->vba.UseMALLForStaticScreen[k] == dm_use_mall_static_screen_optimize) + && (mode_lib->vba.UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame)); + } + + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.FullFrameMALLPStateMethod = false; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SubViewportMALLPStateMethod = false; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.PhantomPipeMALLPStateMethod = false; + + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + if (mode_lib->vba.UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame) + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.FullFrameMALLPStateMethod = true; + if (mode_lib->vba.UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SubViewportMALLPStateMethod = true; + if (mode_lib->vba.UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.PhantomPipeMALLPStateMethod = true; + } + mode_lib->vba.InvalidCombinationOfMALLUseForPState = (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SubViewportMALLPStateMethod + != v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.PhantomPipeMALLPStateMethod) || (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SubViewportMALLPStateMethod && v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.FullFrameMALLPStateMethod); + + if (mode_lib->vba.UseMinimumRequiredDCFCLK == true) { + dml32_UseMinimumDCFCLK( + mode_lib->vba.UsesMALLForPStateChange, + mode_lib->vba.DRRDisplay, + mode_lib->vba.SynchronizeDRRDisplaysForUCLKPStateChangeFinal, + mode_lib->vba.MaxInterDCNTileRepeaters, + mode_lib->vba.MaxPrefetchMode, + mode_lib->vba.DRAMClockChangeLatency, + mode_lib->vba.FCLKChangeLatency, + mode_lib->vba.SREnterPlusExitTime, + mode_lib->vba.ReturnBusWidth, + mode_lib->vba.RoundTripPingLatencyCycles, + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ReorderingBytes, + mode_lib->vba.PixelChunkSizeInKByte, + mode_lib->vba.MetaChunkSize, + mode_lib->vba.GPUVMEnable, + mode_lib->vba.GPUVMMaxPageTableLevels, + mode_lib->vba.HostVMEnable, + mode_lib->vba.NumberOfActiveSurfaces, + mode_lib->vba.HostVMMinPageSize, + mode_lib->vba.HostVMMaxNonCachedPageTableLevels, + mode_lib->vba.DynamicMetadataVMEnabled, + mode_lib->vba.ImmediateFlipRequiredFinal, + mode_lib->vba.ProgressiveToInterlaceUnitInOPP, + mode_lib->vba.MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation, + mode_lib->vba.PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency, + mode_lib->vba.VTotal, + mode_lib->vba.VActive, + mode_lib->vba.DynamicMetadataTransmittedBytes, + mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired, + mode_lib->vba.Interlace, + mode_lib->vba.RequiredDPPCLK, + mode_lib->vba.RequiredDISPCLK, + mode_lib->vba.UrgLatency, + mode_lib->vba.NoOfDPP, + mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.MaximumVStartup, + mode_lib->vba.TotalNumberOfActiveDPP, + mode_lib->vba.TotalNumberOfDCCActiveDPP, + mode_lib->vba.dpte_group_bytes, + mode_lib->vba.PrefetchLinesY, + mode_lib->vba.PrefetchLinesC, + mode_lib->vba.swath_width_luma_ub_all_states, + mode_lib->vba.swath_width_chroma_ub_all_states, + mode_lib->vba.BytePerPixelY, + mode_lib->vba.BytePerPixelC, + mode_lib->vba.HTotal, + mode_lib->vba.PixelClock, + mode_lib->vba.PDEAndMetaPTEBytesPerFrame, + mode_lib->vba.DPTEBytesPerRow, + mode_lib->vba.MetaRowBytes, + mode_lib->vba.DynamicMetadataEnable, + mode_lib->vba.ReadBandwidthLuma, + mode_lib->vba.ReadBandwidthChroma, + mode_lib->vba.DCFCLKPerState, + + /* Output */ + mode_lib->vba.DCFCLKState); + } // UseMinimumRequiredDCFCLK == true + + for (i = 0; i < (int) v->soc.num_states; ++i) { + for (j = 0; j <= 1; ++j) { + mode_lib->vba.ReturnBWPerState[i][j] = dml32_get_return_bw_mbps(&mode_lib->vba.soc, i, + mode_lib->vba.HostVMEnable, mode_lib->vba.DCFCLKState[i][j], + mode_lib->vba.FabricClockPerState[i], mode_lib->vba.DRAMSpeedPerState[i]); + } + } + + //Re-ordering Buffer Support Check + for (i = 0; i < (int) v->soc.num_states; ++i) { + for (j = 0; j <= 1; ++j) { + if ((mode_lib->vba.ROBBufferSizeInKByte - mode_lib->vba.PixelChunkSizeInKByte) * 1024 + / mode_lib->vba.ReturnBWPerState[i][j] + > (mode_lib->vba.RoundTripPingLatencyCycles + 32) + / mode_lib->vba.DCFCLKState[i][j] + + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ReorderingBytes / mode_lib->vba.ReturnBWPerState[i][j]) { + mode_lib->vba.ROBSupport[i][j] = true; + } else { + mode_lib->vba.ROBSupport[i][j] = false; + } + } + } + + //Vertical Active BW support check + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaxTotalVActiveRDBandwidth = 0; + + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaxTotalVActiveRDBandwidth += mode_lib->vba.ReadBandwidthLuma[k] + + mode_lib->vba.ReadBandwidthChroma[k]; + } + + for (i = 0; i < (int) v->soc.num_states; ++i) { + for (j = 0; j <= 1; ++j) { + mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i][j] = + dml_min3(mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLKState[i][j] + * mode_lib->vba.MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation / 100, + mode_lib->vba.FabricClockPerState[i] + * mode_lib->vba.FabricDatapathToDCNDataReturn + * mode_lib->vba.MaxAveragePercentOfIdealFabricBWDisplayCanUseInNormalSystemOperation / 100, + mode_lib->vba.DRAMSpeedPerState[i] + * mode_lib->vba.NumberOfChannels + * mode_lib->vba.DRAMChannelWidth + * (i < 2 ? mode_lib->vba.MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperationSTROBE : mode_lib->vba.MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation) / 100); + + if (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaxTotalVActiveRDBandwidth + <= mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i][j]) { + mode_lib->vba.TotalVerticalActiveBandwidthSupport[i][j] = true; + } else { + mode_lib->vba.TotalVerticalActiveBandwidthSupport[i][j] = false; + } + } + } + + /* Prefetch Check */ + + for (i = 0; i < (int) v->soc.num_states; ++i) { + for (j = 0; j <= 1; ++j) { + + mode_lib->vba.TimeCalc = 24 / mode_lib->vba.ProjectedDCFCLKDeepSleep[i][j]; + + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + mode_lib->vba.NoOfDPPThisState[k] = mode_lib->vba.NoOfDPP[i][j][k]; + mode_lib->vba.swath_width_luma_ub_this_state[k] = + mode_lib->vba.swath_width_luma_ub_all_states[i][j][k]; + mode_lib->vba.swath_width_chroma_ub_this_state[k] = + mode_lib->vba.swath_width_chroma_ub_all_states[i][j][k]; + mode_lib->vba.SwathWidthYThisState[k] = mode_lib->vba.SwathWidthYAllStates[i][j][k]; + mode_lib->vba.SwathWidthCThisState[k] = mode_lib->vba.SwathWidthCAllStates[i][j][k]; + mode_lib->vba.SwathHeightYThisState[k] = mode_lib->vba.SwathHeightYAllStates[i][j][k]; + mode_lib->vba.SwathHeightCThisState[k] = mode_lib->vba.SwathHeightCAllStates[i][j][k]; + mode_lib->vba.UnboundedRequestEnabledThisState = + mode_lib->vba.UnboundedRequestEnabledAllStates[i][j]; + mode_lib->vba.CompressedBufferSizeInkByteThisState = + mode_lib->vba.CompressedBufferSizeInkByteAllStates[i][j]; + mode_lib->vba.DETBufferSizeInKByteThisState[k] = + mode_lib->vba.DETBufferSizeInKByteAllStates[i][j][k]; + mode_lib->vba.DETBufferSizeYThisState[k] = + mode_lib->vba.DETBufferSizeYAllStates[i][j][k]; + mode_lib->vba.DETBufferSizeCThisState[k] = + mode_lib->vba.DETBufferSizeCAllStates[i][j][k]; + } + + mode_lib->vba.VActiveBandwithSupport[i][j] = dml32_CalculateVActiveBandwithSupport( + mode_lib->vba.NumberOfActiveSurfaces, + mode_lib->vba.ReturnBWPerState[i][j], + mode_lib->vba.NoUrgentLatencyHiding, + mode_lib->vba.ReadBandwidthLuma, + mode_lib->vba.ReadBandwidthChroma, + mode_lib->vba.cursor_bw, + mode_lib->vba.meta_row_bandwidth_this_state, + mode_lib->vba.dpte_row_bandwidth_this_state, + mode_lib->vba.NoOfDPPThisState, + mode_lib->vba.UrgentBurstFactorLuma, + mode_lib->vba.UrgentBurstFactorChroma, + mode_lib->vba.UrgentBurstFactorCursor); + + mode_lib->vba.NotEnoughDETSwathFillLatencyHidingPerState[i][j] = dml32_CalculateDETSwathFillLatencyHiding( + mode_lib->vba.NumberOfActiveSurfaces, + mode_lib->vba.ReturnBWPerState[i][j], + mode_lib->vba.UrgLatency[i], + mode_lib->vba.SwathHeightYThisState, + mode_lib->vba.SwathHeightCThisState, + mode_lib->vba.swath_width_luma_ub_this_state, + mode_lib->vba.swath_width_chroma_ub_this_state, + mode_lib->vba.BytePerPixelInDETY, + mode_lib->vba.BytePerPixelInDETC, + mode_lib->vba.DETBufferSizeYThisState, + mode_lib->vba.DETBufferSizeCThisState, + mode_lib->vba.NoOfDPPThisState, + mode_lib->vba.HTotal, + mode_lib->vba.PixelClock, + mode_lib->vba.VRatio, + mode_lib->vba.VRatioChroma, + mode_lib->vba.UsesMALLForPStateChange); + + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.VMDataOnlyReturnBWPerState = dml32_get_return_bw_mbps_vm_only(&mode_lib->vba.soc, i, + mode_lib->vba.DCFCLKState[i][j], mode_lib->vba.FabricClockPerState[i], + mode_lib->vba.DRAMSpeedPerState[i]); + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor = 1; + + if (mode_lib->vba.GPUVMEnable && mode_lib->vba.HostVMEnable) + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor = mode_lib->vba.ReturnBWPerState[i][j] + / v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.VMDataOnlyReturnBWPerState; + + mode_lib->vba.ExtraLatency = dml32_CalculateExtraLatency( + mode_lib->vba.RoundTripPingLatencyCycles, v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ReorderingBytes, + mode_lib->vba.DCFCLKState[i][j], mode_lib->vba.TotalNumberOfActiveDPP[i][j], + mode_lib->vba.PixelChunkSizeInKByte, + mode_lib->vba.TotalNumberOfDCCActiveDPP[i][j], mode_lib->vba.MetaChunkSize, + mode_lib->vba.ReturnBWPerState[i][j], mode_lib->vba.GPUVMEnable, + mode_lib->vba.HostVMEnable, mode_lib->vba.NumberOfActiveSurfaces, + mode_lib->vba.NoOfDPPThisState, mode_lib->vba.dpte_group_bytes, + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor, mode_lib->vba.HostVMMinPageSize, + mode_lib->vba.HostVMMaxNonCachedPageTableLevels); + + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NextPrefetchModeState = mode_lib->vba.MinPrefetchMode; + + mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup[i][j]; + + do { + mode_lib->vba.PrefetchModePerState[i][j] = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NextPrefetchModeState; + mode_lib->vba.MaxVStartup = mode_lib->vba.NextMaxVStartup; + + for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) { + mode_lib->vba.TWait = dml32_CalculateTWait( + mode_lib->vba.PrefetchModePerState[i][j], + mode_lib->vba.UsesMALLForPStateChange[k], + mode_lib->vba.SynchronizeDRRDisplaysForUCLKPStateChangeFinal, + mode_lib->vba.DRRDisplay[k], + mode_lib->vba.DRAMClockChangeLatency, + mode_lib->vba.FCLKChangeLatency, mode_lib->vba.UrgLatency[i], + mode_lib->vba.SREnterPlusExitTime); + + memset(&v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull, 0, sizeof(DmlPipe)); + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.Dppclk = mode_lib->vba.RequiredDPPCLK[i][j][k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.Dispclk = mode_lib->vba.RequiredDISPCLK[i][j]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.PixelClock = mode_lib->vba.PixelClock[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.DCFClkDeepSleep = mode_lib->vba.ProjectedDCFCLKDeepSleep[i][j]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.DPPPerSurface = mode_lib->vba.NoOfDPP[i][j][k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.SourceRotation = mode_lib->vba.SourceRotation[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BlockWidth256BytesY = mode_lib->vba.Read256BlockWidthY[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BlockHeight256BytesY = mode_lib->vba.Read256BlockHeightY[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BlockWidth256BytesC = mode_lib->vba.Read256BlockWidthC[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BlockHeight256BytesC = mode_lib->vba.Read256BlockHeightC[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.InterlaceEnable = mode_lib->vba.Interlace[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.HTotal = mode_lib->vba.HTotal[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.HActive = mode_lib->vba.HActive[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.DCCEnable = mode_lib->vba.DCCEnable[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.ODMMode = mode_lib->vba.ODMCombineEnablePerState[i][k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.SourcePixelFormat = mode_lib->vba.SourcePixelFormat[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BytePerPixelY = mode_lib->vba.BytePerPixelY[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BytePerPixelC = mode_lib->vba.BytePerPixelC[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.ProgressiveToInterlaceUnitInOPP = + mode_lib->vba.ProgressiveToInterlaceUnitInOPP; + + mode_lib->vba.NoTimeForPrefetch[i][j][k] = + dml32_CalculatePrefetchSchedule( + v, + k, + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor, + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe, + v->DSCDelayPerState[i][k], + v->SwathWidthYThisState[k] / v->HRatio[k], + dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]), + v->MaximumVStartup[i][j][k], + v->UrgLatency[i], + v->ExtraLatency, + v->TimeCalc, + v->PDEAndMetaPTEBytesPerFrame[i][j][k], + v->MetaRowBytes[i][j][k], + v->DPTEBytesPerRow[i][j][k], + v->PrefetchLinesY[i][j][k], + v->SwathWidthYThisState[k], + v->PrefillY[k], + v->MaxNumSwY[k], + v->PrefetchLinesC[i][j][k], + v->SwathWidthCThisState[k], + v->PrefillC[k], + v->MaxNumSwC[k], + v->swath_width_luma_ub_this_state[k], + v->swath_width_chroma_ub_this_state[k], + v->SwathHeightYThisState[k], + v->SwathHeightCThisState[k], v->TWait, + (v->DRAMSpeedPerState[i] <= MEM_STROBE_FREQ_MHZ || v->DCFCLKState[i][j] <= DCFCLK_FREQ_EXTRA_PREFETCH_REQ_MHZ) ? + mode_lib->vba.ip.min_prefetch_in_strobe_us : 0, + + /* Output */ + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.DSTXAfterScaler[k], + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.DSTYAfterScaler[k], + &v->LineTimesForPrefetch[k], + &v->PrefetchBW[k], + &v->LinesForMetaPTE[k], + &v->LinesForMetaAndDPTERow[k], + &v->VRatioPreY[i][j][k], + &v->VRatioPreC[i][j][k], + &v->RequiredPrefetchPixelDataBWLuma[0][0][k], + &v->RequiredPrefetchPixelDataBWChroma[0][0][k], + &v->NoTimeForDynamicMetadata[i][j][k], + &v->Tno_bw[k], + &v->prefetch_vmrow_bw[k], + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[0], // double *Tdmdl_vm + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[1], // double *Tdmdl + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[2], // double *TSetup + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer[0], // unsigned int *VUpdateOffsetPix + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[3], // unsigned int *VUpdateWidthPix + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[4]); // unsigned int *VReadyOffsetPix + } + + for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) { + dml32_CalculateUrgentBurstFactor( + mode_lib->vba.UsesMALLForPStateChange[k], + mode_lib->vba.swath_width_luma_ub_this_state[k], + mode_lib->vba.swath_width_chroma_ub_this_state[k], + mode_lib->vba.SwathHeightYThisState[k], + mode_lib->vba.SwathHeightCThisState[k], + mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k], + mode_lib->vba.UrgLatency[i], mode_lib->vba.CursorBufferSize, + mode_lib->vba.CursorWidth[k][0], mode_lib->vba.CursorBPP[k][0], + mode_lib->vba.VRatioPreY[i][j][k], + mode_lib->vba.VRatioPreC[i][j][k], + mode_lib->vba.BytePerPixelInDETY[k], + mode_lib->vba.BytePerPixelInDETC[k], + mode_lib->vba.DETBufferSizeYThisState[k], + mode_lib->vba.DETBufferSizeCThisState[k], + /* Output */ + &mode_lib->vba.UrgentBurstFactorCursorPre[k], + &mode_lib->vba.UrgentBurstFactorLumaPre[k], + &mode_lib->vba.UrgentBurstFactorChromaPre[k], + &mode_lib->vba.NotUrgentLatencyHidingPre[k]); + } + + { + dml32_CalculatePrefetchBandwithSupport( + mode_lib->vba.NumberOfActiveSurfaces, + mode_lib->vba.ReturnBWPerState[i][j], + mode_lib->vba.NotUrgentLatencyHidingPre, + mode_lib->vba.ReadBandwidthLuma, + mode_lib->vba.ReadBandwidthChroma, + mode_lib->vba.RequiredPrefetchPixelDataBWLuma[0][0], + mode_lib->vba.RequiredPrefetchPixelDataBWChroma[0][0], + mode_lib->vba.cursor_bw, + mode_lib->vba.meta_row_bandwidth_this_state, + mode_lib->vba.dpte_row_bandwidth_this_state, + mode_lib->vba.cursor_bw_pre, + mode_lib->vba.prefetch_vmrow_bw, + mode_lib->vba.NoOfDPPThisState, + mode_lib->vba.UrgentBurstFactorLuma, + mode_lib->vba.UrgentBurstFactorChroma, + mode_lib->vba.UrgentBurstFactorCursor, + mode_lib->vba.UrgentBurstFactorLumaPre, + mode_lib->vba.UrgentBurstFactorChromaPre, + mode_lib->vba.UrgentBurstFactorCursorPre, + + /* output */ + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[0], // Single *PrefetchBandwidth + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[1], // Single *FractionOfUrgentBandwidth + &mode_lib->vba.PrefetchSupported[i][j]); + } + + for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) { + if (mode_lib->vba.LineTimesForPrefetch[k] + < 2.0 || mode_lib->vba.LinesForMetaPTE[k] >= 32.0 + || mode_lib->vba.LinesForMetaAndDPTERow[k] >= 16.0 + || mode_lib->vba.NoTimeForPrefetch[i][j][k] == true) { + mode_lib->vba.PrefetchSupported[i][j] = false; + } + } + + mode_lib->vba.DynamicMetadataSupported[i][j] = true; + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + if (mode_lib->vba.NoTimeForDynamicMetadata[i][j][k] == true) + mode_lib->vba.DynamicMetadataSupported[i][j] = false; + } + + mode_lib->vba.VRatioInPrefetchSupported[i][j] = true; + for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) { + if (mode_lib->vba.VRatioPreY[i][j][k] > __DML_MAX_VRATIO_PRE__ + || mode_lib->vba.VRatioPreC[i][j][k] > __DML_MAX_VRATIO_PRE__ + || mode_lib->vba.NoTimeForPrefetch[i][j][k] == true) { + mode_lib->vba.VRatioInPrefetchSupported[i][j] = false; + } + } + mode_lib->vba.AnyLinesForVMOrRowTooLarge = false; + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + if (mode_lib->vba.LinesForMetaAndDPTERow[k] >= 16 + || mode_lib->vba.LinesForMetaPTE[k] >= 32) { + mode_lib->vba.AnyLinesForVMOrRowTooLarge = true; + } + } + + if (mode_lib->vba.PrefetchSupported[i][j] == true + && mode_lib->vba.VRatioInPrefetchSupported[i][j] == true) { + mode_lib->vba.BandwidthAvailableForImmediateFlip = + dml32_CalculateBandwidthAvailableForImmediateFlip( + mode_lib->vba.NumberOfActiveSurfaces, + mode_lib->vba.ReturnBWPerState[i][j], + mode_lib->vba.ReadBandwidthLuma, + mode_lib->vba.ReadBandwidthChroma, + mode_lib->vba.RequiredPrefetchPixelDataBWLuma[0][0], + mode_lib->vba.RequiredPrefetchPixelDataBWChroma[0][0], + mode_lib->vba.cursor_bw, + mode_lib->vba.cursor_bw_pre, + mode_lib->vba.NoOfDPPThisState, + mode_lib->vba.UrgentBurstFactorLuma, + mode_lib->vba.UrgentBurstFactorChroma, + mode_lib->vba.UrgentBurstFactorCursor, + mode_lib->vba.UrgentBurstFactorLumaPre, + mode_lib->vba.UrgentBurstFactorChromaPre, + mode_lib->vba.UrgentBurstFactorCursorPre); + + mode_lib->vba.TotImmediateFlipBytes = 0.0; + for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) { + if (!(mode_lib->vba.ImmediateFlipRequirement[k] == + dm_immediate_flip_not_required)) { + mode_lib->vba.TotImmediateFlipBytes = + mode_lib->vba.TotImmediateFlipBytes + + mode_lib->vba.NoOfDPP[i][j][k] + * mode_lib->vba.PDEAndMetaPTEBytesPerFrame[i][j][k] + + mode_lib->vba.MetaRowBytes[i][j][k]; + if (mode_lib->vba.use_one_row_for_frame_flip[i][j][k]) { + mode_lib->vba.TotImmediateFlipBytes = + mode_lib->vba.TotImmediateFlipBytes + 2 + * mode_lib->vba.DPTEBytesPerRow[i][j][k]; + } else { + mode_lib->vba.TotImmediateFlipBytes = + mode_lib->vba.TotImmediateFlipBytes + + mode_lib->vba.DPTEBytesPerRow[i][j][k]; + } + } + } + + for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) { + dml32_CalculateFlipSchedule(v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor, + mode_lib->vba.ExtraLatency, + mode_lib->vba.UrgLatency[i], + mode_lib->vba.GPUVMMaxPageTableLevels, + mode_lib->vba.HostVMEnable, + mode_lib->vba.HostVMMaxNonCachedPageTableLevels, + mode_lib->vba.GPUVMEnable, + mode_lib->vba.HostVMMinPageSize, + mode_lib->vba.PDEAndMetaPTEBytesPerFrame[i][j][k], + mode_lib->vba.MetaRowBytes[i][j][k], + mode_lib->vba.DPTEBytesPerRow[i][j][k], + mode_lib->vba.BandwidthAvailableForImmediateFlip, + mode_lib->vba.TotImmediateFlipBytes, + mode_lib->vba.SourcePixelFormat[k], + (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]), + mode_lib->vba.VRatio[k], + mode_lib->vba.VRatioChroma[k], + mode_lib->vba.Tno_bw[k], + mode_lib->vba.DCCEnable[k], + mode_lib->vba.dpte_row_height[k], + mode_lib->vba.meta_row_height[k], + mode_lib->vba.dpte_row_height_chroma[k], + mode_lib->vba.meta_row_height_chroma[k], + mode_lib->vba.use_one_row_for_frame_flip[i][j][k], // 24 + + /* Output */ + &mode_lib->vba.DestinationLinesToRequestVMInImmediateFlip[k], + &mode_lib->vba.DestinationLinesToRequestRowInImmediateFlip[k], + &mode_lib->vba.final_flip_bw[k], + &mode_lib->vba.ImmediateFlipSupportedForPipe[k]); + } + + { + dml32_CalculateImmediateFlipBandwithSupport(mode_lib->vba.NumberOfActiveSurfaces, + mode_lib->vba.ReturnBWPerState[i][j], + mode_lib->vba.ImmediateFlipRequirement, + mode_lib->vba.final_flip_bw, + mode_lib->vba.ReadBandwidthLuma, + mode_lib->vba.ReadBandwidthChroma, + mode_lib->vba.RequiredPrefetchPixelDataBWLuma[0][0], + mode_lib->vba.RequiredPrefetchPixelDataBWChroma[0][0], + mode_lib->vba.cursor_bw, + mode_lib->vba.meta_row_bandwidth_this_state, + mode_lib->vba.dpte_row_bandwidth_this_state, + mode_lib->vba.cursor_bw_pre, + mode_lib->vba.prefetch_vmrow_bw, + mode_lib->vba.DPPPerPlane, + mode_lib->vba.UrgentBurstFactorLuma, + mode_lib->vba.UrgentBurstFactorChroma, + mode_lib->vba.UrgentBurstFactorCursor, + mode_lib->vba.UrgentBurstFactorLumaPre, + mode_lib->vba.UrgentBurstFactorChromaPre, + mode_lib->vba.UrgentBurstFactorCursorPre, + + /* output */ + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[0], // Single *TotalBandwidth + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[1], // Single *FractionOfUrgentBandwidth + &mode_lib->vba.ImmediateFlipSupportedForState[i][j]); // Boolean *ImmediateFlipBandwidthSupport + } + + for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) { + if (!(mode_lib->vba.ImmediateFlipRequirement[k] + == dm_immediate_flip_not_required) + && (mode_lib->vba.ImmediateFlipSupportedForPipe[k] + == false)) + mode_lib->vba.ImmediateFlipSupportedForState[i][j] = false; + } + } else { // if prefetch not support, assume iflip not supported + mode_lib->vba.ImmediateFlipSupportedForState[i][j] = false; + } + + if (mode_lib->vba.MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ + || mode_lib->vba.AnyLinesForVMOrRowTooLarge == false) { + mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup[i][j]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NextPrefetchModeState = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NextPrefetchModeState + 1; + } else { + mode_lib->vba.NextMaxVStartup = mode_lib->vba.NextMaxVStartup - 1; + } + } while (!((mode_lib->vba.PrefetchSupported[i][j] == true + && mode_lib->vba.DynamicMetadataSupported[i][j] == true + && mode_lib->vba.VRatioInPrefetchSupported[i][j] == true && + // consider flip support is okay if when there is no hostvm and the + // user does't require a iflip OR the flip bw is ok + // If there is hostvm, DCN needs to support iflip for invalidation + ((mode_lib->vba.HostVMEnable == false + && !mode_lib->vba.ImmediateFlipRequiredFinal) + || mode_lib->vba.ImmediateFlipSupportedForState[i][j] == true)) + || (mode_lib->vba.NextMaxVStartup == mode_lib->vba.MaxMaxVStartup[i][j] + && v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NextPrefetchModeState > mode_lib->vba.MaxPrefetchMode))); + + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { + mode_lib->vba.use_one_row_for_frame_this_state[k] = + mode_lib->vba.use_one_row_for_frame[i][j][k]; + } + + + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters.UrgentLatency = mode_lib->vba.UrgLatency[i]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters.ExtraLatency = mode_lib->vba.ExtraLatency; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters.WritebackLatency = mode_lib->vba.WritebackLatency; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters.DRAMClockChangeLatency = mode_lib->vba.DRAMClockChangeLatency; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters.FCLKChangeLatency = mode_lib->vba.FCLKChangeLatency; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters.SRExitTime = mode_lib->vba.SRExitTime; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters.SREnterPlusExitTime = mode_lib->vba.SREnterPlusExitTime; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters.SRExitZ8Time = mode_lib->vba.SRExitZ8Time; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters.SREnterPlusExitZ8Time = mode_lib->vba.SREnterPlusExitZ8Time; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters.USRRetrainingLatency = mode_lib->vba.USRRetrainingLatency; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters.SMNLatency = mode_lib->vba.SMNLatency; + + { + dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( + v, + v->PrefetchModePerState[i][j], + v->DCFCLKState[i][j], + v->ReturnBWPerState[i][j], + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters, + v->SOCCLKPerState[i], + v->ProjectedDCFCLKDeepSleep[i][j], + v->DETBufferSizeYThisState, + v->DETBufferSizeCThisState, + v->SwathHeightYThisState, + v->SwathHeightCThisState, + v->SwathWidthYThisState, // 24 + v->SwathWidthCThisState, + v->NoOfDPPThisState, + v->BytePerPixelInDETY, + v->BytePerPixelInDETC, + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.DSTXAfterScaler, + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.DSTYAfterScaler, + v->UnboundedRequestEnabledThisState, + v->CompressedBufferSizeInkByteThisState, + + /* Output */ + &v->DRAMClockChangeSupport[i][j], + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single2[0], // double *MaxActiveDRAMClockChangeLatencySupported + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer[0], // Long SubViewportLinesNeededInMALL[] + &v->FCLKChangeSupport[i][j], + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single2[1], // double *MinActiveFCLKChangeLatencySupported + &mode_lib->vba.USRRetrainingSupport[i][j], + mode_lib->vba.ActiveDRAMClockChangeLatencyMarginPerState[i][j]); + } + } + } // End of Prefetch Check + + /*Cursor Support Check*/ + mode_lib->vba.CursorSupport = true; + for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) { + if (mode_lib->vba.CursorWidth[k][0] > 0.0) { + if (mode_lib->vba.CursorBPP[k][0] == 64 && mode_lib->vba.Cursor64BppSupport == false) + mode_lib->vba.CursorSupport = false; + } + } + + /*Valid Pitch Check*/ + mode_lib->vba.PitchSupport = true; + for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) { + mode_lib->vba.AlignedYPitch[k] = dml_ceil( + dml_max(mode_lib->vba.PitchY[k], mode_lib->vba.SurfaceWidthY[k]), + mode_lib->vba.MacroTileWidthY[k]); + if (mode_lib->vba.DCCEnable[k] == true) { + mode_lib->vba.AlignedDCCMetaPitchY[k] = dml_ceil( + dml_max(mode_lib->vba.DCCMetaPitchY[k], mode_lib->vba.SurfaceWidthY[k]), + 64.0 * mode_lib->vba.Read256BlockWidthY[k]); + } else { + mode_lib->vba.AlignedDCCMetaPitchY[k] = mode_lib->vba.DCCMetaPitchY[k]; + } + if (mode_lib->vba.SourcePixelFormat[k] != dm_444_64 && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_rgbe + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8) { + mode_lib->vba.AlignedCPitch[k] = dml_ceil( + dml_max(mode_lib->vba.PitchC[k], mode_lib->vba.SurfaceWidthC[k]), + mode_lib->vba.MacroTileWidthC[k]); + if (mode_lib->vba.DCCEnable[k] == true) { + mode_lib->vba.AlignedDCCMetaPitchC[k] = dml_ceil( + dml_max(mode_lib->vba.DCCMetaPitchC[k], + mode_lib->vba.SurfaceWidthC[k]), + 64.0 * mode_lib->vba.Read256BlockWidthC[k]); + } else { + mode_lib->vba.AlignedDCCMetaPitchC[k] = mode_lib->vba.DCCMetaPitchC[k]; + } + } else { + mode_lib->vba.AlignedCPitch[k] = mode_lib->vba.PitchC[k]; + mode_lib->vba.AlignedDCCMetaPitchC[k] = mode_lib->vba.DCCMetaPitchC[k]; + } + if (mode_lib->vba.AlignedYPitch[k] > mode_lib->vba.PitchY[k] + || mode_lib->vba.AlignedCPitch[k] > mode_lib->vba.PitchC[k] + || mode_lib->vba.AlignedDCCMetaPitchY[k] > mode_lib->vba.DCCMetaPitchY[k] + || mode_lib->vba.AlignedDCCMetaPitchC[k] > mode_lib->vba.DCCMetaPitchC[k]) { + mode_lib->vba.PitchSupport = false; + } + } + + mode_lib->vba.ViewportExceedsSurface = false; + for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) { + if (mode_lib->vba.ViewportWidth[k] > mode_lib->vba.SurfaceWidthY[k] + || mode_lib->vba.ViewportHeight[k] > mode_lib->vba.SurfaceHeightY[k]) { + mode_lib->vba.ViewportExceedsSurface = true; + if (mode_lib->vba.SourcePixelFormat[k] != dm_444_64 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_8 + && mode_lib->vba.SourcePixelFormat[k] != dm_rgbe) { + if (mode_lib->vba.ViewportWidthChroma[k] > mode_lib->vba.SurfaceWidthC[k] + || mode_lib->vba.ViewportHeightChroma[k] + > mode_lib->vba.SurfaceHeightC[k]) { + mode_lib->vba.ViewportExceedsSurface = true; + } + } + } + } + + /*Mode Support, Voltage State and SOC Configuration*/ + mode_support_configuration(v, mode_lib); + + MaximumMPCCombine = 0; + + for (i = v->soc.num_states; i >= 0; i--) { + if (i == v->soc.num_states || mode_lib->vba.ModeSupport[i][0] == true || + mode_lib->vba.ModeSupport[i][1] == true) { + mode_lib->vba.VoltageLevel = i; + mode_lib->vba.ModeIsSupported = mode_lib->vba.ModeSupport[i][0] == true + || mode_lib->vba.ModeSupport[i][1] == true; + + if (mode_lib->vba.ModeSupport[i][0] == true) + MaximumMPCCombine = 0; + else + MaximumMPCCombine = 1; + } + } + + mode_lib->vba.ImmediateFlipSupport = + mode_lib->vba.ImmediateFlipSupportedForState[mode_lib->vba.VoltageLevel][MaximumMPCCombine]; + mode_lib->vba.UnboundedRequestEnabled = + mode_lib->vba.UnboundedRequestEnabledAllStates[mode_lib->vba.VoltageLevel][MaximumMPCCombine]; + mode_lib->vba.CompressedBufferSizeInkByte = + mode_lib->vba.CompressedBufferSizeInkByteAllStates[mode_lib->vba.VoltageLevel][MaximumMPCCombine]; // Not used, informational + + for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) { + mode_lib->vba.MPCCombineEnable[k] = + mode_lib->vba.MPCCombine[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k]; + mode_lib->vba.DPPPerPlane[k] = mode_lib->vba.NoOfDPP[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k]; + mode_lib->vba.SwathHeightY[k] = + mode_lib->vba.SwathHeightYAllStates[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k]; + mode_lib->vba.SwathHeightC[k] = + mode_lib->vba.SwathHeightCAllStates[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k]; + mode_lib->vba.DETBufferSizeInKByte[k] = + mode_lib->vba.DETBufferSizeInKByteAllStates[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k]; + mode_lib->vba.DETBufferSizeY[k] = + mode_lib->vba.DETBufferSizeYAllStates[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k]; + mode_lib->vba.DETBufferSizeC[k] = + mode_lib->vba.DETBufferSizeCAllStates[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k]; + mode_lib->vba.OutputType[k] = mode_lib->vba.OutputTypePerState[mode_lib->vba.VoltageLevel][k]; + mode_lib->vba.OutputRate[k] = mode_lib->vba.OutputRatePerState[mode_lib->vba.VoltageLevel][k]; + } + + mode_lib->vba.DCFCLK = mode_lib->vba.DCFCLKState[mode_lib->vba.VoltageLevel][MaximumMPCCombine]; + mode_lib->vba.DRAMSpeed = mode_lib->vba.DRAMSpeedPerState[mode_lib->vba.VoltageLevel]; + mode_lib->vba.FabricClock = mode_lib->vba.FabricClockPerState[mode_lib->vba.VoltageLevel]; + mode_lib->vba.SOCCLK = mode_lib->vba.SOCCLKPerState[mode_lib->vba.VoltageLevel]; + mode_lib->vba.ReturnBW = mode_lib->vba.ReturnBWPerState[mode_lib->vba.VoltageLevel][MaximumMPCCombine]; + mode_lib->vba.DISPCLK = mode_lib->vba.RequiredDISPCLK[mode_lib->vba.VoltageLevel][MaximumMPCCombine]; + mode_lib->vba.maxMpcComb = MaximumMPCCombine; + + for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) { + if (mode_lib->vba.BlendingAndTiming[k] == k) { + mode_lib->vba.ODMCombineEnabled[k] = + mode_lib->vba.ODMCombineEnablePerState[mode_lib->vba.VoltageLevel][k]; + } else { + mode_lib->vba.ODMCombineEnabled[k] = dm_odm_combine_mode_disabled; + } + + mode_lib->vba.DSCEnabled[k] = mode_lib->vba.RequiresDSC[mode_lib->vba.VoltageLevel][k]; + mode_lib->vba.FECEnable[k] = mode_lib->vba.RequiresFEC[mode_lib->vba.VoltageLevel][k]; + mode_lib->vba.OutputBpp[k] = mode_lib->vba.OutputBppPerState[mode_lib->vba.VoltageLevel][k]; + } + + mode_lib->vba.UrgentWatermark = mode_lib->vba.Watermark.UrgentWatermark; + mode_lib->vba.StutterEnterPlusExitWatermark = mode_lib->vba.Watermark.StutterEnterPlusExitWatermark; + mode_lib->vba.StutterExitWatermark = mode_lib->vba.Watermark.StutterExitWatermark; + mode_lib->vba.WritebackDRAMClockChangeWatermark = mode_lib->vba.Watermark.WritebackDRAMClockChangeWatermark; + mode_lib->vba.DRAMClockChangeWatermark = mode_lib->vba.Watermark.DRAMClockChangeWatermark; + mode_lib->vba.UrgentLatency = mode_lib->vba.UrgLatency[mode_lib->vba.VoltageLevel]; + mode_lib->vba.DCFCLKDeepSleep = mode_lib->vba.ProjectedDCFCLKDeepSleep[mode_lib->vba.VoltageLevel][MaximumMPCCombine]; + + /* VBA has Error type to Error Msg output here, but not necessary for DML-C */ +} // ModeSupportAndSystemConfigurationFull diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.h new file mode 100644 index 000000000..a475775bc --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.h @@ -0,0 +1,63 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DML32_DISPLAY_MODE_VBA_H__ +#define __DML32_DISPLAY_MODE_VBA_H__ + +#include "../display_mode_enums.h" + +// To enable a lot of debug msg +//#define __DML_VBA_DEBUG__ +// For DML-C changes that hasn't been propagated to VBA yet +//#define __DML_VBA_ALLOW_DELTA__ + +// Move these to ip parameters/constant +// At which vstartup the DML start to try if the mode can be supported +#define __DML_VBA_MIN_VSTARTUP__ 9 + +// Delay in DCFCLK from ARB to DET (1st num is ARB to SDPIF, 2nd number is SDPIF to DET) +#define __DML_ARB_TO_RET_DELAY__ 7 + 95 + +// fudge factor for min dcfclk calclation +#define __DML_MIN_DCFCLK_FACTOR__ 1.15 + +// Prefetch schedule max vratio +#define __DML_MAX_VRATIO_PRE__ 4.0 + +#define __DML_VBA_MAX_DST_Y_PRE__ 63.75 + +#define BPP_INVALID 0 +#define BPP_BLENDED_PIPE 0xffffffff + +#define MEM_STROBE_FREQ_MHZ 1600 +#define DCFCLK_FREQ_EXTRA_PREFETCH_REQ_MHZ 300 +#define MEM_STROBE_MAX_DELIVERY_TIME_US 60.0 + +struct display_mode_lib; + +void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib); +void dml32_recalculate(struct display_mode_lib *mode_lib); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c new file mode 100644 index 000000000..23e4be2ad --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c @@ -0,0 +1,6304 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ +#include "display_mode_vba_util_32.h" +#include "../dml_inline_defs.h" +#include "display_mode_vba_32.h" +#include "../display_mode_lib.h" + +#define DCN32_MAX_FMT_420_BUFFER_WIDTH 4096 + +unsigned int dml32_dscceComputeDelay( + unsigned int bpc, + double BPP, + unsigned int sliceWidth, + unsigned int numSlices, + enum output_format_class pixelFormat, + enum output_encoder_class Output) +{ + // valid bpc = source bits per component in the set of {8, 10, 12} + // valid bpp = increments of 1/16 of a bit + // min = 6/7/8 in N420/N422/444, respectively + // max = such that compression is 1:1 + //valid sliceWidth = number of pixels per slice line, + // must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode) + //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4} + //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420} + + // fixed value + unsigned int rcModelSize = 8192; + + // N422/N420 operate at 2 pixels per clock + unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, L, + Delay, pixels; + + if (pixelFormat == dm_420) + pixelsPerClock = 2; + else if (pixelFormat == dm_n422) + pixelsPerClock = 2; + // #all other modes operate at 1 pixel per clock + else + pixelsPerClock = 1; + + //initial transmit delay as per PPS + initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock); + + //compute ssm delay + if (bpc == 8) + D = 81; + else if (bpc == 10) + D = 89; + else + D = 113; + + //divide by pixel per cycle to compute slice width as seen by DSC + w = sliceWidth / pixelsPerClock; + + //422 mode has an additional cycle of delay + if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422) + s = 0; + else + s = 1; + + //main calculation for the dscce + ix = initalXmitDelay + 45; + wx = (w + 2) / 3; + p = 3 * wx - w; + l0 = ix / w; + a = ix + p * l0; + ax = (a + 2) / 3 + D + 6 + 1; + L = (ax + wx - 1) / wx; + if ((ix % w) == 0 && p != 0) + lstall = 1; + else + lstall = 0; + Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22; + + //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels + pixels = Delay * 3 * pixelsPerClock; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: bpc: %d\n", __func__, bpc); + dml_print("DML::%s: BPP: %f\n", __func__, BPP); + dml_print("DML::%s: sliceWidth: %d\n", __func__, sliceWidth); + dml_print("DML::%s: numSlices: %d\n", __func__, numSlices); + dml_print("DML::%s: pixelFormat: %d\n", __func__, pixelFormat); + dml_print("DML::%s: Output: %d\n", __func__, Output); + dml_print("DML::%s: pixels: %d\n", __func__, pixels); +#endif + + return pixels; +} + +unsigned int dml32_dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output) +{ + unsigned int Delay = 0; + + if (pixelFormat == dm_420) { + // sfr + Delay = Delay + 2; + // dsccif + Delay = Delay + 0; + // dscc - input deserializer + Delay = Delay + 3; + // dscc gets pixels every other cycle + Delay = Delay + 2; + // dscc - input cdc fifo + Delay = Delay + 12; + // dscc gets pixels every other cycle + Delay = Delay + 13; + // dscc - cdc uncertainty + Delay = Delay + 2; + // dscc - output cdc fifo + Delay = Delay + 7; + // dscc gets pixels every other cycle + Delay = Delay + 3; + // dscc - cdc uncertainty + Delay = Delay + 2; + // dscc - output serializer + Delay = Delay + 1; + // sft + Delay = Delay + 1; + } else if (pixelFormat == dm_n422 || (pixelFormat != dm_444)) { + // sfr + Delay = Delay + 2; + // dsccif + Delay = Delay + 1; + // dscc - input deserializer + Delay = Delay + 5; + // dscc - input cdc fifo + Delay = Delay + 25; + // dscc - cdc uncertainty + Delay = Delay + 2; + // dscc - output cdc fifo + Delay = Delay + 10; + // dscc - cdc uncertainty + Delay = Delay + 2; + // dscc - output serializer + Delay = Delay + 1; + // sft + Delay = Delay + 1; + } else { + // sfr + Delay = Delay + 2; + // dsccif + Delay = Delay + 0; + // dscc - input deserializer + Delay = Delay + 3; + // dscc - input cdc fifo + Delay = Delay + 12; + // dscc - cdc uncertainty + Delay = Delay + 2; + // dscc - output cdc fifo + Delay = Delay + 7; + // dscc - output serializer + Delay = Delay + 1; + // dscc - cdc uncertainty + Delay = Delay + 2; + // sft + Delay = Delay + 1; + } + + return Delay; +} + + +bool IsVertical(enum dm_rotation_angle Scan) +{ + bool is_vert = false; + + if (Scan == dm_rotation_90 || Scan == dm_rotation_90m || Scan == dm_rotation_270 || Scan == dm_rotation_270m) + is_vert = true; + else + is_vert = false; + return is_vert; +} + +void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput( + double HRatio, + double HRatioChroma, + double VRatio, + double VRatioChroma, + double MaxDCHUBToPSCLThroughput, + double MaxPSCLToLBThroughput, + double PixelClock, + enum source_format_class SourcePixelFormat, + unsigned int HTaps, + unsigned int HTapsChroma, + unsigned int VTaps, + unsigned int VTapsChroma, + + /* output */ + double *PSCL_THROUGHPUT, + double *PSCL_THROUGHPUT_CHROMA, + double *DPPCLKUsingSingleDPP) +{ + double DPPCLKUsingSingleDPPLuma; + double DPPCLKUsingSingleDPPChroma; + + if (HRatio > 1) { + *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio / + dml_ceil((double) HTaps / 6.0, 1.0)); + } else { + *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput); + } + + DPPCLKUsingSingleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio / + *PSCL_THROUGHPUT, 1); + + if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock) + DPPCLKUsingSingleDPPLuma = 2 * PixelClock; + + if ((SourcePixelFormat != dm_420_8 && SourcePixelFormat != dm_420_10 && SourcePixelFormat != dm_420_12 && + SourcePixelFormat != dm_rgbe_alpha)) { + *PSCL_THROUGHPUT_CHROMA = 0; + *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma; + } else { + if (HRatioChroma > 1) { + *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * + HRatioChroma / dml_ceil((double) HTapsChroma / 6.0, 1.0)); + } else { + *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput); + } + DPPCLKUsingSingleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma), + HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1); + if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock) + DPPCLKUsingSingleDPPChroma = 2 * PixelClock; + *DPPCLKUsingSingleDPP = dml_max(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma); + } +} + +void dml32_CalculateBytePerPixelAndBlockSizes( + enum source_format_class SourcePixelFormat, + enum dm_swizzle_mode SurfaceTiling, + + /* Output */ + unsigned int *BytePerPixelY, + unsigned int *BytePerPixelC, + double *BytePerPixelDETY, + double *BytePerPixelDETC, + unsigned int *BlockHeight256BytesY, + unsigned int *BlockHeight256BytesC, + unsigned int *BlockWidth256BytesY, + unsigned int *BlockWidth256BytesC, + unsigned int *MacroTileHeightY, + unsigned int *MacroTileHeightC, + unsigned int *MacroTileWidthY, + unsigned int *MacroTileWidthC) +{ + if (SourcePixelFormat == dm_444_64) { + *BytePerPixelDETY = 8; + *BytePerPixelDETC = 0; + *BytePerPixelY = 8; + *BytePerPixelC = 0; + } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) { + *BytePerPixelDETY = 4; + *BytePerPixelDETC = 0; + *BytePerPixelY = 4; + *BytePerPixelC = 0; + } else if (SourcePixelFormat == dm_444_16) { + *BytePerPixelDETY = 2; + *BytePerPixelDETC = 0; + *BytePerPixelY = 2; + *BytePerPixelC = 0; + } else if (SourcePixelFormat == dm_444_8) { + *BytePerPixelDETY = 1; + *BytePerPixelDETC = 0; + *BytePerPixelY = 1; + *BytePerPixelC = 0; + } else if (SourcePixelFormat == dm_rgbe_alpha) { + *BytePerPixelDETY = 4; + *BytePerPixelDETC = 1; + *BytePerPixelY = 4; + *BytePerPixelC = 1; + } else if (SourcePixelFormat == dm_420_8) { + *BytePerPixelDETY = 1; + *BytePerPixelDETC = 2; + *BytePerPixelY = 1; + *BytePerPixelC = 2; + } else if (SourcePixelFormat == dm_420_12) { + *BytePerPixelDETY = 2; + *BytePerPixelDETC = 4; + *BytePerPixelY = 2; + *BytePerPixelC = 4; + } else { + *BytePerPixelDETY = 4.0 / 3; + *BytePerPixelDETC = 8.0 / 3; + *BytePerPixelY = 2; + *BytePerPixelC = 4; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: SourcePixelFormat = %d\n", __func__, SourcePixelFormat); + dml_print("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY); + dml_print("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC); + dml_print("DML::%s: BytePerPixelY = %d\n", __func__, *BytePerPixelY); + dml_print("DML::%s: BytePerPixelC = %d\n", __func__, *BytePerPixelC); +#endif + if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32 + || SourcePixelFormat == dm_444_16 + || SourcePixelFormat == dm_444_8 + || SourcePixelFormat == dm_mono_16 + || SourcePixelFormat == dm_mono_8 + || SourcePixelFormat == dm_rgbe)) { + if (SurfaceTiling == dm_sw_linear) + *BlockHeight256BytesY = 1; + else if (SourcePixelFormat == dm_444_64) + *BlockHeight256BytesY = 4; + else if (SourcePixelFormat == dm_444_8) + *BlockHeight256BytesY = 16; + else + *BlockHeight256BytesY = 8; + + *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; + *BlockHeight256BytesC = 0; + *BlockWidth256BytesC = 0; + } else { + if (SurfaceTiling == dm_sw_linear) { + *BlockHeight256BytesY = 1; + *BlockHeight256BytesC = 1; + } else if (SourcePixelFormat == dm_rgbe_alpha) { + *BlockHeight256BytesY = 8; + *BlockHeight256BytesC = 16; + } else if (SourcePixelFormat == dm_420_8) { + *BlockHeight256BytesY = 16; + *BlockHeight256BytesC = 8; + } else { + *BlockHeight256BytesY = 8; + *BlockHeight256BytesC = 8; + } + *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; + *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: BlockWidth256BytesY = %d\n", __func__, *BlockWidth256BytesY); + dml_print("DML::%s: BlockHeight256BytesY = %d\n", __func__, *BlockHeight256BytesY); + dml_print("DML::%s: BlockWidth256BytesC = %d\n", __func__, *BlockWidth256BytesC); + dml_print("DML::%s: BlockHeight256BytesC = %d\n", __func__, *BlockHeight256BytesC); +#endif + + if (SurfaceTiling == dm_sw_linear) { + *MacroTileHeightY = *BlockHeight256BytesY; + *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY; + *MacroTileHeightC = *BlockHeight256BytesC; + if (*MacroTileHeightC == 0) + *MacroTileWidthC = 0; + else + *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC; + } else if (SurfaceTiling == dm_sw_64kb_d || SurfaceTiling == dm_sw_64kb_d_t || + SurfaceTiling == dm_sw_64kb_d_x || SurfaceTiling == dm_sw_64kb_r_x) { + *MacroTileHeightY = 16 * *BlockHeight256BytesY; + *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY; + *MacroTileHeightC = 16 * *BlockHeight256BytesC; + if (*MacroTileHeightC == 0) + *MacroTileWidthC = 0; + else + *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC; + } else { + *MacroTileHeightY = 32 * *BlockHeight256BytesY; + *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY; + *MacroTileHeightC = 32 * *BlockHeight256BytesC; + if (*MacroTileHeightC == 0) + *MacroTileWidthC = 0; + else + *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC; + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: MacroTileWidthY = %d\n", __func__, *MacroTileWidthY); + dml_print("DML::%s: MacroTileHeightY = %d\n", __func__, *MacroTileHeightY); + dml_print("DML::%s: MacroTileWidthC = %d\n", __func__, *MacroTileWidthC); + dml_print("DML::%s: MacroTileHeightC = %d\n", __func__, *MacroTileHeightC); +#endif +} // CalculateBytePerPixelAndBlockSizes + +void dml32_CalculateSwathAndDETConfiguration( + unsigned int DETSizeOverride[], + enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], + unsigned int ConfigReturnBufferSizeInKByte, + unsigned int MaxTotalDETInKByte, + unsigned int MinCompressedBufferSizeInKByte, + double ForceSingleDPP, + unsigned int NumberOfActiveSurfaces, + unsigned int nomDETInKByte, + enum unbounded_requesting_policy UseUnboundedRequestingFinal, + bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment, + unsigned int PixelChunkSizeKBytes, + unsigned int ROBSizeKBytes, + unsigned int CompressedBufferSegmentSizeInkByteFinal, + enum output_encoder_class Output[], + double ReadBandwidthLuma[], + double ReadBandwidthChroma[], + double MaximumSwathWidthLuma[], + double MaximumSwathWidthChroma[], + enum dm_rotation_angle SourceRotation[], + bool ViewportStationary[], + enum source_format_class SourcePixelFormat[], + enum dm_swizzle_mode SurfaceTiling[], + unsigned int ViewportWidth[], + unsigned int ViewportHeight[], + unsigned int ViewportXStart[], + unsigned int ViewportYStart[], + unsigned int ViewportXStartC[], + unsigned int ViewportYStartC[], + unsigned int SurfaceWidthY[], + unsigned int SurfaceWidthC[], + unsigned int SurfaceHeightY[], + unsigned int SurfaceHeightC[], + unsigned int Read256BytesBlockHeightY[], + unsigned int Read256BytesBlockHeightC[], + unsigned int Read256BytesBlockWidthY[], + unsigned int Read256BytesBlockWidthC[], + enum odm_combine_mode ODMMode[], + unsigned int BlendingAndTiming[], + unsigned int BytePerPixY[], + unsigned int BytePerPixC[], + double BytePerPixDETY[], + double BytePerPixDETC[], + unsigned int HActive[], + double HRatio[], + double HRatioChroma[], + unsigned int DPPPerSurface[], + + /* Output */ + unsigned int swath_width_luma_ub[], + unsigned int swath_width_chroma_ub[], + double SwathWidth[], + double SwathWidthChroma[], + unsigned int SwathHeightY[], + unsigned int SwathHeightC[], + unsigned int DETBufferSizeInKByte[], + unsigned int DETBufferSizeY[], + unsigned int DETBufferSizeC[], + bool *UnboundedRequestEnabled, + unsigned int *CompressedBufferSizeInkByte, + unsigned int *CompBufReservedSpaceKBytes, + bool *CompBufReservedSpaceNeedAdjustment, + bool ViewportSizeSupportPerSurface[], + bool *ViewportSizeSupport) +{ + unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX]; + unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX]; + unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX]; + unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX]; + unsigned int RoundedUpSwathSizeBytesY; + unsigned int RoundedUpSwathSizeBytesC; + double SwathWidthdoubleDPP[DC__NUM_DPP__MAX]; + double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX]; + unsigned int k; + unsigned int TotalActiveDPP = 0; + bool NoChromaSurfaces = true; + unsigned int DETBufferSizeInKByteForSwathCalculation; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP); + dml_print("DML::%s: ROBSizeKBytes = %d\n", __func__, ROBSizeKBytes); + dml_print("DML::%s: PixelChunkSizeKBytes = %d\n", __func__, PixelChunkSizeKBytes); +#endif + dml32_CalculateSwathWidth(ForceSingleDPP, + NumberOfActiveSurfaces, + SourcePixelFormat, + SourceRotation, + ViewportStationary, + ViewportWidth, + ViewportHeight, + ViewportXStart, + ViewportYStart, + ViewportXStartC, + ViewportYStartC, + SurfaceWidthY, + SurfaceWidthC, + SurfaceHeightY, + SurfaceHeightC, + ODMMode, + BytePerPixY, + BytePerPixC, + Read256BytesBlockHeightY, + Read256BytesBlockHeightC, + Read256BytesBlockWidthY, + Read256BytesBlockWidthC, + BlendingAndTiming, + HActive, + HRatio, + DPPPerSurface, + + /* Output */ + SwathWidthdoubleDPP, + SwathWidthdoubleDPPChroma, + SwathWidth, + SwathWidthChroma, + MaximumSwathHeightY, + MaximumSwathHeightC, + swath_width_luma_ub, + swath_width_chroma_ub); + + for (k = 0; k < NumberOfActiveSurfaces; ++k) { + RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k]; + RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k]; +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%0d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]); + dml_print("DML::%s: k=%0d swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]); + dml_print("DML::%s: k=%0d BytePerPixDETY = %f\n", __func__, k, BytePerPixDETY[k]); + dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, MaximumSwathHeightY[k]); + dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k, + RoundedUpMaxSwathSizeBytesY[k]); + dml_print("DML::%s: k=%0d swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]); + dml_print("DML::%s: k=%0d BytePerPixDETC = %f\n", __func__, k, BytePerPixDETC[k]); + dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, MaximumSwathHeightC[k]); + dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k, + RoundedUpMaxSwathSizeBytesC[k]); +#endif + + if (SourcePixelFormat[k] == dm_420_10) { + RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesY[k], 256); + RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesC[k], 256); + } + } + + for (k = 0; k < NumberOfActiveSurfaces; ++k) { + TotalActiveDPP = TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]); + if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || + SourcePixelFormat[k] == dm_420_12 || SourcePixelFormat[k] == dm_rgbe_alpha) { + NoChromaSurfaces = false; + } + } + + // By default, just set the reserved space to 2 pixel chunks size + *CompBufReservedSpaceKBytes = PixelChunkSizeKBytes * 2; + + // if unbounded req is enabled, program reserved space such that the ROB will not hold more than 8 swaths worth of data + // - assume worst-case compression rate of 4. [ROB size - 8 * swath_size / max_compression ratio] + // - assume for "narrow" vp case in which the ROB can fit 8 swaths, the DET should be big enough to do full size req + *CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (RoundedUpMaxSwathSizeBytesY[0]/512); + + if (*CompBufReservedSpaceNeedAdjustment == 1) { + *CompBufReservedSpaceKBytes = ROBSizeKBytes - RoundedUpMaxSwathSizeBytesY[0]/512; + } + + #ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: CompBufReservedSpaceKBytes = %d\n", __func__, *CompBufReservedSpaceKBytes); + dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, *CompBufReservedSpaceNeedAdjustment); + #endif + + *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment); + + dml32_CalculateDETBufferSize(DETSizeOverride, + UseMALLForPStateChange, + ForceSingleDPP, + NumberOfActiveSurfaces, + *UnboundedRequestEnabled, + nomDETInKByte, + MaxTotalDETInKByte, + ConfigReturnBufferSizeInKByte, + MinCompressedBufferSizeInKByte, + CompressedBufferSegmentSizeInkByteFinal, + SourcePixelFormat, + ReadBandwidthLuma, + ReadBandwidthChroma, + RoundedUpMaxSwathSizeBytesY, + RoundedUpMaxSwathSizeBytesC, + DPPPerSurface, + + /* Output */ + DETBufferSizeInKByte, // per hubp pipe + CompressedBufferSizeInkByte); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP); + dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte); + dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte); + dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal); + dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled); + dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte); +#endif + + *ViewportSizeSupport = true; + for (k = 0; k < NumberOfActiveSurfaces; ++k) { + + DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] == + dm_use_mall_pstate_change_phantom_pipe ? 1024 : DETBufferSizeInKByte[k]); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%0d DETBufferSizeInKByteForSwathCalculation = %d\n", __func__, k, + DETBufferSizeInKByteForSwathCalculation); +#endif + + if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <= + DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { + SwathHeightY[k] = MaximumSwathHeightY[k]; + SwathHeightC[k] = MaximumSwathHeightC[k]; + RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k]; + RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k]; + } else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] && + RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <= + DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { + SwathHeightY[k] = MaximumSwathHeightY[k] / 2; + SwathHeightC[k] = MaximumSwathHeightC[k]; + RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2; + RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k]; + } else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] && + RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <= + DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { + SwathHeightY[k] = MaximumSwathHeightY[k]; + SwathHeightC[k] = MaximumSwathHeightC[k] / 2; + RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k]; + RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2; + } else { + SwathHeightY[k] = MaximumSwathHeightY[k] / 2; + SwathHeightC[k] = MaximumSwathHeightC[k] / 2; + RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2; + RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2; + } + + if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 > + DETBufferSizeInKByteForSwathCalculation * 1024 / 2) + || SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 && + SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) { + *ViewportSizeSupport = false; + ViewportSizeSupportPerSurface[k] = false; + } else { + ViewportSizeSupportPerSurface[k] = true; + } + + if (SwathHeightC[k] == 0) { +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%0d All DET for plane0\n", __func__, k); +#endif + DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024; + DETBufferSizeC[k] = 0; + } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) { +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%0d Half DET for plane0, half for plane1\n", __func__, k); +#endif + DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024 / 2; + DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 / 2; + } else { +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%0d 2/3 DET for plane0, 1/3 for plane1\n", __func__, k); +#endif + DETBufferSizeY[k] = dml_floor(DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024); + DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 - DETBufferSizeY[k]; + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%0d SwathHeightY = %d\n", __func__, k, SwathHeightY[k]); + dml_print("DML::%s: k=%0d SwathHeightC = %d\n", __func__, k, SwathHeightC[k]); + dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, + k, RoundedUpMaxSwathSizeBytesY[k]); + dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, + k, RoundedUpMaxSwathSizeBytesC[k]); + dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, RoundedUpSwathSizeBytesY); + dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, RoundedUpSwathSizeBytesC); + dml_print("DML::%s: k=%0d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]); + dml_print("DML::%s: k=%0d DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]); + dml_print("DML::%s: k=%0d DETBufferSizeC = %d\n", __func__, k, DETBufferSizeC[k]); + dml_print("DML::%s: k=%0d ViewportSizeSupportPerSurface = %d\n", __func__, k, + ViewportSizeSupportPerSurface[k]); +#endif + + } +} // CalculateSwathAndDETConfiguration + +void dml32_CalculateSwathWidth( + bool ForceSingleDPP, + unsigned int NumberOfActiveSurfaces, + enum source_format_class SourcePixelFormat[], + enum dm_rotation_angle SourceRotation[], + bool ViewportStationary[], + unsigned int ViewportWidth[], + unsigned int ViewportHeight[], + unsigned int ViewportXStart[], + unsigned int ViewportYStart[], + unsigned int ViewportXStartC[], + unsigned int ViewportYStartC[], + unsigned int SurfaceWidthY[], + unsigned int SurfaceWidthC[], + unsigned int SurfaceHeightY[], + unsigned int SurfaceHeightC[], + enum odm_combine_mode ODMMode[], + unsigned int BytePerPixY[], + unsigned int BytePerPixC[], + unsigned int Read256BytesBlockHeightY[], + unsigned int Read256BytesBlockHeightC[], + unsigned int Read256BytesBlockWidthY[], + unsigned int Read256BytesBlockWidthC[], + unsigned int BlendingAndTiming[], + unsigned int HActive[], + double HRatio[], + unsigned int DPPPerSurface[], + + /* Output */ + double SwathWidthdoubleDPPY[], + double SwathWidthdoubleDPPC[], + double SwathWidthY[], // per-pipe + double SwathWidthC[], // per-pipe + unsigned int MaximumSwathHeightY[], + unsigned int MaximumSwathHeightC[], + unsigned int swath_width_luma_ub[], // per-pipe + unsigned int swath_width_chroma_ub[]) // per-pipe +{ + unsigned int k, j; + enum odm_combine_mode MainSurfaceODMMode; + + unsigned int surface_width_ub_l; + unsigned int surface_height_ub_l; + unsigned int surface_width_ub_c = 0; + unsigned int surface_height_ub_c = 0; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP); + dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces); +#endif + + for (k = 0; k < NumberOfActiveSurfaces; ++k) { + if (!IsVertical(SourceRotation[k])) + SwathWidthdoubleDPPY[k] = ViewportWidth[k]; + else + SwathWidthdoubleDPPY[k] = ViewportHeight[k]; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]); + dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]); +#endif + + MainSurfaceODMMode = ODMMode[k]; + for (j = 0; j < NumberOfActiveSurfaces; ++j) { + if (BlendingAndTiming[k] == j) + MainSurfaceODMMode = ODMMode[j]; + } + + if (ForceSingleDPP) { + SwathWidthY[k] = SwathWidthdoubleDPPY[k]; + } else { + if (MainSurfaceODMMode == dm_odm_combine_mode_4to1) { + SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k], + dml_round(HActive[k] / 4.0 * HRatio[k])); + } else if (MainSurfaceODMMode == dm_odm_combine_mode_2to1) { + SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k], + dml_round(HActive[k] / 2.0 * HRatio[k])); + } else if (DPPPerSurface[k] == 2) { + SwathWidthY[k] = SwathWidthdoubleDPPY[k] / 2; + } else { + SwathWidthY[k] = SwathWidthdoubleDPPY[k]; + } + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%d HActive=%d\n", __func__, k, HActive[k]); + dml_print("DML::%s: k=%d HRatio=%f\n", __func__, k, HRatio[k]); + dml_print("DML::%s: k=%d MainSurfaceODMMode=%d\n", __func__, k, MainSurfaceODMMode); + dml_print("DML::%s: k=%d SwathWidthdoubleDPPY=%d\n", __func__, k, SwathWidthdoubleDPPY[k]); + dml_print("DML::%s: k=%d SwathWidthY=%d\n", __func__, k, SwathWidthY[k]); +#endif + + if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || + SourcePixelFormat[k] == dm_420_12) { + SwathWidthC[k] = SwathWidthY[k] / 2; + SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k] / 2; + } else { + SwathWidthC[k] = SwathWidthY[k]; + SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k]; + } + + if (ForceSingleDPP == true) { + SwathWidthY[k] = SwathWidthdoubleDPPY[k]; + SwathWidthC[k] = SwathWidthdoubleDPPC[k]; + } + + surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]); + surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]); + + if (!IsVertical(SourceRotation[k])) { + MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k]; + MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k]; + if (ViewportStationary[k] && DPPPerSurface[k] == 1) { + swath_width_luma_ub[k] = dml_min(surface_width_ub_l, + dml_floor(ViewportXStart[k] + + SwathWidthY[k] + + Read256BytesBlockWidthY[k] - 1, + Read256BytesBlockWidthY[k]) - + dml_floor(ViewportXStart[k], + Read256BytesBlockWidthY[k])); + } else { + swath_width_luma_ub[k] = dml_min(surface_width_ub_l, + dml_ceil(SwathWidthY[k] - 1, + Read256BytesBlockWidthY[k]) + + Read256BytesBlockWidthY[k]); + } + if (BytePerPixC[k] > 0) { + surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]); + if (ViewportStationary[k] && DPPPerSurface[k] == 1) { + swath_width_chroma_ub[k] = dml_min(surface_width_ub_c, + dml_floor(ViewportXStartC[k] + SwathWidthC[k] + + Read256BytesBlockWidthC[k] - 1, + Read256BytesBlockWidthC[k]) - + dml_floor(ViewportXStartC[k], + Read256BytesBlockWidthC[k])); + } else { + swath_width_chroma_ub[k] = dml_min(surface_width_ub_c, + dml_ceil(SwathWidthC[k] - 1, + Read256BytesBlockWidthC[k]) + + Read256BytesBlockWidthC[k]); + } + } else { + swath_width_chroma_ub[k] = 0; + } + } else { + MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k]; + MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k]; + + if (ViewportStationary[k] && DPPPerSurface[k] == 1) { + swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_floor(ViewportYStart[k] + + SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1, + Read256BytesBlockHeightY[k]) - + dml_floor(ViewportYStart[k], Read256BytesBlockHeightY[k])); + } else { + swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_ceil(SwathWidthY[k] - 1, + Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]); + } + if (BytePerPixC[k] > 0) { + surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]); + if (ViewportStationary[k] && DPPPerSurface[k] == 1) { + swath_width_chroma_ub[k] = dml_min(surface_height_ub_c, + dml_floor(ViewportYStartC[k] + SwathWidthC[k] + + Read256BytesBlockHeightC[k] - 1, + Read256BytesBlockHeightC[k]) - + dml_floor(ViewportYStartC[k], + Read256BytesBlockHeightC[k])); + } else { + swath_width_chroma_ub[k] = dml_min(surface_height_ub_c, + dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + + Read256BytesBlockHeightC[k]); + } + } else { + swath_width_chroma_ub[k] = 0; + } + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l); + dml_print("DML::%s: k=%d surface_height_ub_l=%0d\n", __func__, k, surface_height_ub_l); + dml_print("DML::%s: k=%d surface_width_ub_c=%0d\n", __func__, k, surface_width_ub_c); + dml_print("DML::%s: k=%d surface_height_ub_c=%0d\n", __func__, k, surface_height_ub_c); + dml_print("DML::%s: k=%d Read256BytesBlockWidthY=%0d\n", __func__, k, Read256BytesBlockWidthY[k]); + dml_print("DML::%s: k=%d Read256BytesBlockHeightY=%0d\n", __func__, k, Read256BytesBlockHeightY[k]); + dml_print("DML::%s: k=%d Read256BytesBlockWidthC=%0d\n", __func__, k, Read256BytesBlockWidthC[k]); + dml_print("DML::%s: k=%d Read256BytesBlockHeightC=%0d\n", __func__, k, Read256BytesBlockHeightC[k]); + dml_print("DML::%s: k=%d ViewportStationary=%0d\n", __func__, k, ViewportStationary[k]); + dml_print("DML::%s: k=%d DPPPerSurface=%0d\n", __func__, k, DPPPerSurface[k]); + dml_print("DML::%s: k=%d swath_width_luma_ub=%0d\n", __func__, k, swath_width_luma_ub[k]); + dml_print("DML::%s: k=%d swath_width_chroma_ub=%0d\n", __func__, k, swath_width_chroma_ub[k]); + dml_print("DML::%s: k=%d MaximumSwathHeightY=%0d\n", __func__, k, MaximumSwathHeightY[k]); + dml_print("DML::%s: k=%d MaximumSwathHeightC=%0d\n", __func__, k, MaximumSwathHeightC[k]); +#endif + + } +} // CalculateSwathWidth + +bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, + unsigned int TotalNumberOfActiveDPP, + bool NoChroma, + enum output_encoder_class Output, + enum dm_swizzle_mode SurfaceTiling, + bool CompBufReservedSpaceNeedAdjustment, + bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment) +{ + bool ret_val = false; + + ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && + TotalNumberOfActiveDPP == 1 && NoChroma); + if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp) + ret_val = false; + + if (SurfaceTiling == dm_sw_linear) + ret_val = false; + + if (CompBufReservedSpaceNeedAdjustment == 1 && DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment) + ret_val = false; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, CompBufReservedSpaceNeedAdjustment); + dml_print("DML::%s: DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment = %d\n", __func__, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment); + dml_print("DML::%s: ret_val = %d\n", __func__, ret_val); +#endif + + return (ret_val); +} + +void dml32_CalculateDETBufferSize( + unsigned int DETSizeOverride[], + enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], + bool ForceSingleDPP, + unsigned int NumberOfActiveSurfaces, + bool UnboundedRequestEnabled, + unsigned int nomDETInKByte, + unsigned int MaxTotalDETInKByte, + unsigned int ConfigReturnBufferSizeInKByte, + unsigned int MinCompressedBufferSizeInKByte, + unsigned int CompressedBufferSegmentSizeInkByteFinal, + enum source_format_class SourcePixelFormat[], + double ReadBandwidthLuma[], + double ReadBandwidthChroma[], + unsigned int RoundedUpMaxSwathSizeBytesY[], + unsigned int RoundedUpMaxSwathSizeBytesC[], + unsigned int DPPPerSurface[], + /* Output */ + unsigned int DETBufferSizeInKByte[], + unsigned int *CompressedBufferSizeInkByte) +{ + unsigned int DETBufferSizePoolInKByte; + unsigned int NextDETBufferPieceInKByte; + bool DETPieceAssignedToThisSurfaceAlready[DC__NUM_DPP__MAX]; + bool NextPotentialSurfaceToAssignDETPieceFound; + unsigned int NextSurfaceToAssignDETPiece; + double TotalBandwidth; + double BandwidthOfSurfacesNotAssignedDETPiece; + unsigned int max_minDET; + unsigned int minDET; + unsigned int minDET_pipe; + unsigned int j, k; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP); + dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte); + dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces); + dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled); + dml_print("DML::%s: MaxTotalDETInKByte = %d\n", __func__, MaxTotalDETInKByte); + dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte); + dml_print("DML::%s: MinCompressedBufferSizeInKByte = %d\n", __func__, MinCompressedBufferSizeInKByte); + dml_print("DML::%s: CompressedBufferSegmentSizeInkByteFinal = %d\n", __func__, + CompressedBufferSegmentSizeInkByteFinal); +#endif + + // Note: Will use default det size if that fits 2 swaths + if (UnboundedRequestEnabled) { + if (DETSizeOverride[0] > 0) { + DETBufferSizeInKByte[0] = DETSizeOverride[0]; + } else { + DETBufferSizeInKByte[0] = dml_max(nomDETInKByte, dml_ceil(2.0 * + ((double) RoundedUpMaxSwathSizeBytesY[0] + + (double) RoundedUpMaxSwathSizeBytesC[0]) / 1024.0, 64.0)); + } + *CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0]; + } else { + DETBufferSizePoolInKByte = MaxTotalDETInKByte; + for (k = 0; k < NumberOfActiveSurfaces; ++k) { + DETBufferSizeInKByte[k] = nomDETInKByte; + if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || + SourcePixelFormat[k] == dm_420_12) { + max_minDET = nomDETInKByte - 64; + } else { + max_minDET = nomDETInKByte; + } + minDET = 128; + minDET_pipe = 0; + + // add DET resource until can hold 2 full swaths + while (minDET <= max_minDET && minDET_pipe == 0) { + if (2.0 * ((double) RoundedUpMaxSwathSizeBytesY[k] + + (double) RoundedUpMaxSwathSizeBytesC[k]) / 1024.0 <= minDET) + minDET_pipe = minDET; + minDET = minDET + 64; + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%0d minDET = %d\n", __func__, k, minDET); + dml_print("DML::%s: k=%0d max_minDET = %d\n", __func__, k, max_minDET); + dml_print("DML::%s: k=%0d minDET_pipe = %d\n", __func__, k, minDET_pipe); + dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k, + RoundedUpMaxSwathSizeBytesY[k]); + dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k, + RoundedUpMaxSwathSizeBytesC[k]); +#endif + + if (minDET_pipe == 0) { + minDET_pipe = dml_max(128, dml_ceil(((double)RoundedUpMaxSwathSizeBytesY[k] + + (double)RoundedUpMaxSwathSizeBytesC[k]) / 1024.0, 64)); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%0d minDET_pipe = %d (assume each plane take half DET)\n", + __func__, k, minDET_pipe); +#endif + } + + if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) { + DETBufferSizeInKByte[k] = 0; + } else if (DETSizeOverride[k] > 0) { + DETBufferSizeInKByte[k] = DETSizeOverride[k]; + DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - + (ForceSingleDPP ? 1 : DPPPerSurface[k]) * DETSizeOverride[k]; + } else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe <= DETBufferSizePoolInKByte) { + DETBufferSizeInKByte[k] = minDET_pipe; + DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - + (ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe; + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]); + dml_print("DML::%s: k=%d DETSizeOverride = %d\n", __func__, k, DETSizeOverride[k]); + dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]); + dml_print("DML::%s: DETBufferSizePoolInKByte = %d\n", __func__, DETBufferSizePoolInKByte); +#endif + } + + TotalBandwidth = 0; + for (k = 0; k < NumberOfActiveSurfaces; ++k) { + if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) + TotalBandwidth = TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: --- Before bandwidth adjustment ---\n", __func__); + for (uint k = 0; k < NumberOfActiveSurfaces; ++k) + dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]); + dml_print("DML::%s: --- DET allocation with bandwidth ---\n", __func__); + dml_print("DML::%s: TotalBandwidth = %f\n", __func__, TotalBandwidth); +#endif + BandwidthOfSurfacesNotAssignedDETPiece = TotalBandwidth; + for (k = 0; k < NumberOfActiveSurfaces; ++k) { + + if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) { + DETPieceAssignedToThisSurfaceAlready[k] = true; + } else if (DETSizeOverride[k] > 0 || (((double) (ForceSingleDPP ? 1 : DPPPerSurface[k]) * + (double) DETBufferSizeInKByte[k] / (double) MaxTotalDETInKByte) >= + ((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / TotalBandwidth))) { + DETPieceAssignedToThisSurfaceAlready[k] = true; + BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece - + ReadBandwidthLuma[k] - ReadBandwidthChroma[k]; + } else { + DETPieceAssignedToThisSurfaceAlready[k] = false; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%d DETPieceAssignedToThisSurfaceAlready = %d\n", __func__, k, + DETPieceAssignedToThisSurfaceAlready[k]); + dml_print("DML::%s: k=%d BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k, + BandwidthOfSurfacesNotAssignedDETPiece); +#endif + } + + for (j = 0; j < NumberOfActiveSurfaces; ++j) { + NextPotentialSurfaceToAssignDETPieceFound = false; + NextSurfaceToAssignDETPiece = 0; + + for (k = 0; k < NumberOfActiveSurfaces; ++k) { +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[k] = %f\n", __func__, j, k, + ReadBandwidthLuma[k]); + dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[k] = %f\n", __func__, j, k, + ReadBandwidthChroma[k]); + dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[Next] = %f\n", __func__, j, k, + ReadBandwidthLuma[NextSurfaceToAssignDETPiece]); + dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[Next] = %f\n", __func__, j, k, + ReadBandwidthChroma[NextSurfaceToAssignDETPiece]); + dml_print("DML::%s: j=%d k=%d, NextSurfaceToAssignDETPiece = %d\n", __func__, j, k, + NextSurfaceToAssignDETPiece); +#endif + if (!DETPieceAssignedToThisSurfaceAlready[k] && + (!NextPotentialSurfaceToAssignDETPieceFound || + ReadBandwidthLuma[k] + ReadBandwidthChroma[k] < + ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + + ReadBandwidthChroma[NextSurfaceToAssignDETPiece])) { + NextSurfaceToAssignDETPiece = k; + NextPotentialSurfaceToAssignDETPieceFound = true; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: j=%d k=%d, DETPieceAssignedToThisSurfaceAlready = %d\n", + __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]); + dml_print("DML::%s: j=%d k=%d, NextPotentialSurfaceToAssignDETPieceFound = %d\n", + __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound); +#endif + } + + if (NextPotentialSurfaceToAssignDETPieceFound) { + // Note: To show the banker's rounding behavior in VBA and also the fact + // that the DET buffer size varies due to precision issue + // + //double tmp1 = ((double) DETBufferSizePoolInKByte * + // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + + // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / + // BandwidthOfSurfacesNotAssignedDETPiece / + // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)); + //double tmp2 = dml_round((double) DETBufferSizePoolInKByte * + // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + + // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / + //BandwidthOfSurfacesNotAssignedDETPiece / + // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)); + // + //dml_print("DML::%s: j=%d, tmp1 = %f\n", __func__, j, tmp1); + //dml_print("DML::%s: j=%d, tmp2 = %f\n", __func__, j, tmp2); + + NextDETBufferPieceInKByte = dml_min( + dml_round((double) DETBufferSizePoolInKByte * + (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + + ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / + BandwidthOfSurfacesNotAssignedDETPiece / + ((ForceSingleDPP ? 1 : + DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)) * + (ForceSingleDPP ? 1 : + DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0, + dml_floor((double) DETBufferSizePoolInKByte, + (ForceSingleDPP ? 1 : + DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)); + + // Above calculation can assign the entire DET buffer allocation to a single pipe. + // We should limit the per-pipe DET size to the nominal / max per pipe. + if (NextDETBufferPieceInKByte > nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) { + if (DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] < + nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) { + NextDETBufferPieceInKByte = nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k]) - + DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]; + } else { + // Case where DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] + // already has the max per-pipe value + NextDETBufferPieceInKByte = 0; + } + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: j=%0d, DETBufferSizePoolInKByte = %d\n", __func__, j, + DETBufferSizePoolInKByte); + dml_print("DML::%s: j=%0d, NextSurfaceToAssignDETPiece = %d\n", __func__, j, + NextSurfaceToAssignDETPiece); + dml_print("DML::%s: j=%0d, ReadBandwidthLuma[%0d] = %f\n", __func__, j, + NextSurfaceToAssignDETPiece, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]); + dml_print("DML::%s: j=%0d, ReadBandwidthChroma[%0d] = %f\n", __func__, j, + NextSurfaceToAssignDETPiece, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]); + dml_print("DML::%s: j=%0d, BandwidthOfSurfacesNotAssignedDETPiece = %f\n", + __func__, j, BandwidthOfSurfacesNotAssignedDETPiece); + dml_print("DML::%s: j=%0d, NextDETBufferPieceInKByte = %d\n", __func__, j, + NextDETBufferPieceInKByte); + dml_print("DML::%s: j=%0d, DETBufferSizeInKByte[%0d] increases from %0d ", + __func__, j, NextSurfaceToAssignDETPiece, + DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]); +#endif + + DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] = + DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] + + NextDETBufferPieceInKByte + / (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]); +#ifdef __DML_VBA_DEBUG__ + dml_print("to %0d\n", DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]); +#endif + + DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - NextDETBufferPieceInKByte; + DETPieceAssignedToThisSurfaceAlready[NextSurfaceToAssignDETPiece] = true; + BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece - + (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + + ReadBandwidthChroma[NextSurfaceToAssignDETPiece]); + } + } + *CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte; + } + *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: --- After bandwidth adjustment ---\n", __func__); + dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte); + for (uint k = 0; k < NumberOfActiveSurfaces; ++k) { + dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d (TotalReadBandWidth=%f)\n", + __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]); + } +#endif +} // CalculateDETBufferSize + +void dml32_CalculateODMMode( + unsigned int MaximumPixelsPerLinePerDSCUnit, + unsigned int HActive, + enum output_format_class OutFormat, + enum output_encoder_class Output, + enum odm_combine_policy ODMUse, + double StateDispclk, + double MaxDispclk, + bool DSCEnable, + unsigned int TotalNumberOfActiveDPP, + unsigned int MaxNumDPP, + double PixelClock, + double DISPCLKDPPCLKDSCCLKDownSpreading, + double DISPCLKRampingMargin, + double DISPCLKDPPCLKVCOSpeed, + unsigned int NumberOfDSCSlices, + + /* Output */ + bool *TotalAvailablePipesSupport, + unsigned int *NumberOfDPP, + enum odm_combine_mode *ODMMode, + double *RequiredDISPCLKPerSurface) +{ + + double SurfaceRequiredDISPCLKWithoutODMCombine; + double SurfaceRequiredDISPCLKWithODMCombineTwoToOne; + double SurfaceRequiredDISPCLKWithODMCombineFourToOne; + + SurfaceRequiredDISPCLKWithoutODMCombine = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_disabled, + PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, + MaxDispclk); + SurfaceRequiredDISPCLKWithODMCombineTwoToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_2to1, + PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, + MaxDispclk); + SurfaceRequiredDISPCLKWithODMCombineFourToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_4to1, + PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, + MaxDispclk); + *TotalAvailablePipesSupport = true; + *ODMMode = dm_odm_combine_mode_disabled; // initialize as disable + + if (ODMUse == dm_odm_combine_policy_none) + *ODMMode = dm_odm_combine_mode_disabled; + + *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine; + *NumberOfDPP = 0; + + // FIXME check ODMUse == "" condition does it mean bypass or Gabriel means something like don't care?? + // (ODMUse == "" || ODMUse == "CombineAsNeeded") + + if (!(Output == dm_hdmi || Output == dm_dp || Output == dm_edp) && (ODMUse == dm_odm_combine_policy_4to1 || + ((SurfaceRequiredDISPCLKWithODMCombineTwoToOne > StateDispclk || + (DSCEnable && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit)) + || NumberOfDSCSlices > 8)))) { + if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) { + *ODMMode = dm_odm_combine_mode_4to1; + *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne; + *NumberOfDPP = 4; + } else { + *TotalAvailablePipesSupport = false; + } + } else if (Output != dm_hdmi && (ODMUse == dm_odm_combine_policy_2to1 || + (((SurfaceRequiredDISPCLKWithoutODMCombine > StateDispclk && + SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= StateDispclk) || + (DSCEnable && (HActive > MaximumPixelsPerLinePerDSCUnit)) + || (NumberOfDSCSlices <= 8 && NumberOfDSCSlices > 4))))) { + if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) { + *ODMMode = dm_odm_combine_mode_2to1; + *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne; + *NumberOfDPP = 2; + } else { + *TotalAvailablePipesSupport = false; + } + } else { + if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP) + *NumberOfDPP = 1; + else + *TotalAvailablePipesSupport = false; + } + if (OutFormat == dm_420 && HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH && + ODMUse != dm_odm_combine_policy_4to1) { + if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 4) { + *ODMMode = dm_odm_combine_mode_disabled; + *NumberOfDPP = 0; + *TotalAvailablePipesSupport = false; + } else if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 2 || + *ODMMode == dm_odm_combine_mode_4to1) { + *ODMMode = dm_odm_combine_mode_4to1; + *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne; + *NumberOfDPP = 4; + } else { + *ODMMode = dm_odm_combine_mode_2to1; + *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne; + *NumberOfDPP = 2; + } + } + if (Output == dm_hdmi && OutFormat == dm_420 && + HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH) { + *ODMMode = dm_odm_combine_mode_disabled; + *NumberOfDPP = 0; + *TotalAvailablePipesSupport = false; + } +} + +double dml32_CalculateRequiredDispclk( + enum odm_combine_mode ODMMode, + double PixelClock, + double DISPCLKDPPCLKDSCCLKDownSpreading, + double DISPCLKRampingMargin, + double DISPCLKDPPCLKVCOSpeed, + double MaxDispclk) +{ + double RequiredDispclk = 0.; + double PixelClockAfterODM; + double DISPCLKWithRampingRoundedToDFSGranularity; + double DISPCLKWithoutRampingRoundedToDFSGranularity; + double MaxDispclkRoundedDownToDFSGranularity; + + if (ODMMode == dm_odm_combine_mode_4to1) + PixelClockAfterODM = PixelClock / 4; + else if (ODMMode == dm_odm_combine_mode_2to1) + PixelClockAfterODM = PixelClock / 2; + else + PixelClockAfterODM = PixelClock; + + + DISPCLKWithRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity( + PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100) + * (1 + DISPCLKRampingMargin / 100), 1, DISPCLKDPPCLKVCOSpeed); + + DISPCLKWithoutRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity( + PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100), 1, DISPCLKDPPCLKVCOSpeed); + + MaxDispclkRoundedDownToDFSGranularity = dml32_RoundToDFSGranularity(MaxDispclk, 0, DISPCLKDPPCLKVCOSpeed); + + if (DISPCLKWithoutRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity) + RequiredDispclk = DISPCLKWithoutRampingRoundedToDFSGranularity; + else if (DISPCLKWithRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity) + RequiredDispclk = MaxDispclkRoundedDownToDFSGranularity; + else + RequiredDispclk = DISPCLKWithRampingRoundedToDFSGranularity; + + return RequiredDispclk; +} + +double dml32_RoundToDFSGranularity(double Clock, bool round_up, double VCOSpeed) +{ + if (Clock <= 0.0) + return 0.0; + + if (round_up) + return VCOSpeed * 4.0 / dml_floor(VCOSpeed * 4.0 / Clock, 1.0); + else + return VCOSpeed * 4.0 / dml_ceil(VCOSpeed * 4.0 / Clock, 1.0); +} + +void dml32_CalculateOutputLink( + double PHYCLKPerState, + double PHYCLKD18PerState, + double PHYCLKD32PerState, + double Downspreading, + bool IsMainSurfaceUsingTheIndicatedTiming, + enum output_encoder_class Output, + enum output_format_class OutputFormat, + unsigned int HTotal, + unsigned int HActive, + double PixelClockBackEnd, + double ForcedOutputLinkBPP, + unsigned int DSCInputBitPerComponent, + unsigned int NumberOfDSCSlices, + double AudioSampleRate, + unsigned int AudioSampleLayout, + enum odm_combine_mode ODMModeNoDSC, + enum odm_combine_mode ODMModeDSC, + bool DSCEnable, + unsigned int OutputLinkDPLanes, + enum dm_output_link_dp_rate OutputLinkDPRate, + + /* Output */ + bool *RequiresDSC, + double *RequiresFEC, + double *OutBpp, + enum dm_output_type *OutputType, + enum dm_output_rate *OutputRate, + unsigned int *RequiredSlots) +{ + bool LinkDSCEnable; + unsigned int dummy; + *RequiresDSC = false; + *RequiresFEC = false; + *OutBpp = 0; + *OutputType = dm_output_type_unknown; + *OutputRate = dm_output_rate_unknown; + + if (IsMainSurfaceUsingTheIndicatedTiming) { + if (Output == dm_hdmi) { + *RequiresDSC = false; + *RequiresFEC = false; + *OutBpp = dml32_TruncToValidBPP(dml_min(600, PHYCLKPerState) * 10, 3, HTotal, HActive, + PixelClockBackEnd, ForcedOutputLinkBPP, false, Output, OutputFormat, + DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, + ODMModeNoDSC, ODMModeDSC, &dummy); + //OutputTypeAndRate = "HDMI"; + *OutputType = dm_output_type_hdmi; + + } else if (Output == dm_dp || Output == dm_dp2p0 || Output == dm_edp) { + if (DSCEnable == true) { + *RequiresDSC = true; + LinkDSCEnable = true; + if (Output == dm_dp || Output == dm_dp2p0) + *RequiresFEC = true; + else + *RequiresFEC = false; + } else { + *RequiresDSC = false; + LinkDSCEnable = false; + if (Output == dm_dp2p0) + *RequiresFEC = true; + else + *RequiresFEC = false; + } + if (Output == dm_dp2p0) { + *OutBpp = 0; + if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr10) && + PHYCLKD32PerState >= 10000 / 32) { + *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000, + OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, + ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, + DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, + AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + if (*OutBpp == 0 && PHYCLKD32PerState < 13500 / 32 && DSCEnable == true && + ForcedOutputLinkBPP == 0) { + *RequiresDSC = true; + LinkDSCEnable = true; + *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000, + OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, + ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, + NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, + ODMModeNoDSC, ODMModeDSC, RequiredSlots); + } + //OutputTypeAndRate = Output & " UHBR10"; + *OutputType = dm_output_type_dp2p0; + *OutputRate = dm_output_rate_dp_rate_uhbr10; + } + if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr13p5) && + *OutBpp == 0 && PHYCLKD32PerState >= 13500 / 32) { + *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500, + OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, + ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, + DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, + AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + + if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == true && + ForcedOutputLinkBPP == 0) { + *RequiresDSC = true; + LinkDSCEnable = true; + *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500, + OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, + ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, + NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, + ODMModeNoDSC, ODMModeDSC, RequiredSlots); + } + //OutputTypeAndRate = Output & " UHBR13p5"; + *OutputType = dm_output_type_dp2p0; + *OutputRate = dm_output_rate_dp_rate_uhbr13p5; + } + if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr20) && + *OutBpp == 0 && PHYCLKD32PerState >= 20000 / 32) { + *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000, + OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, + ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, + DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, + AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) { + *RequiresDSC = true; + LinkDSCEnable = true; + *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000, + OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, + ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, + NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, + ODMModeNoDSC, ODMModeDSC, RequiredSlots); + } + //OutputTypeAndRate = Output & " UHBR20"; + *OutputType = dm_output_type_dp2p0; + *OutputRate = dm_output_rate_dp_rate_uhbr20; + } + } else { + *OutBpp = 0; + if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr) && + PHYCLKPerState >= 270) { + *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700, + OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, + ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, + DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, + AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + if (*OutBpp == 0 && PHYCLKPerState < 540 && DSCEnable == true && + ForcedOutputLinkBPP == 0) { + *RequiresDSC = true; + LinkDSCEnable = true; + if (Output == dm_dp) + *RequiresFEC = true; + *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700, + OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, + ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, + NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, + ODMModeNoDSC, ODMModeDSC, RequiredSlots); + } + //OutputTypeAndRate = Output & " HBR"; + *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp; + *OutputRate = dm_output_rate_dp_rate_hbr; + } + if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr2) && + *OutBpp == 0 && PHYCLKPerState >= 540) { + *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400, + OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, + ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, + DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, + AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + + if (*OutBpp == 0 && PHYCLKPerState < 810 && DSCEnable == true && + ForcedOutputLinkBPP == 0) { + *RequiresDSC = true; + LinkDSCEnable = true; + if (Output == dm_dp) + *RequiresFEC = true; + + *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400, + OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, + ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, + NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, + ODMModeNoDSC, ODMModeDSC, RequiredSlots); + } + //OutputTypeAndRate = Output & " HBR2"; + *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp; + *OutputRate = dm_output_rate_dp_rate_hbr2; + } + if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr3) && *OutBpp == 0 && PHYCLKPerState >= 810) { + *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100, + OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, + ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, + AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, + RequiredSlots); + + if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) { + *RequiresDSC = true; + LinkDSCEnable = true; + if (Output == dm_dp) + *RequiresFEC = true; + + *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100, + OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, + ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, + NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, + ODMModeNoDSC, ODMModeDSC, RequiredSlots); + } + //OutputTypeAndRate = Output & " HBR3"; + *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp; + *OutputRate = dm_output_rate_dp_rate_hbr3; + } + } + } + } +} + +void dml32_CalculateDPPCLK( + unsigned int NumberOfActiveSurfaces, + double DISPCLKDPPCLKDSCCLKDownSpreading, + double DISPCLKDPPCLKVCOSpeed, + double DPPCLKUsingSingleDPP[], + unsigned int DPPPerSurface[], + + /* output */ + double *GlobalDPPCLK, + double Dppclk[]) +{ + unsigned int k; + *GlobalDPPCLK = 0; + for (k = 0; k < NumberOfActiveSurfaces; ++k) { + Dppclk[k] = DPPCLKUsingSingleDPP[k] / DPPPerSurface[k] * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100); + *GlobalDPPCLK = dml_max(*GlobalDPPCLK, Dppclk[k]); + } + *GlobalDPPCLK = dml32_RoundToDFSGranularity(*GlobalDPPCLK, 1, DISPCLKDPPCLKVCOSpeed); + for (k = 0; k < NumberOfActiveSurfaces; ++k) + Dppclk[k] = *GlobalDPPCLK / 255 * dml_ceil(Dppclk[k] * 255.0 / *GlobalDPPCLK, 1.0); +} + +double dml32_TruncToValidBPP( + double LinkBitRate, + unsigned int Lanes, + unsigned int HTotal, + unsigned int HActive, + double PixelClock, + double DesiredBPP, + bool DSCEnable, + enum output_encoder_class Output, + enum output_format_class Format, + unsigned int DSCInputBitPerComponent, + unsigned int DSCSlices, + unsigned int AudioRate, + unsigned int AudioLayout, + enum odm_combine_mode ODMModeNoDSC, + enum odm_combine_mode ODMModeDSC, + /* Output */ + unsigned int *RequiredSlots) +{ + double MaxLinkBPP; + unsigned int MinDSCBPP; + double MaxDSCBPP; + unsigned int NonDSCBPP0; + unsigned int NonDSCBPP1; + unsigned int NonDSCBPP2; + unsigned int NonDSCBPP3; + + if (Format == dm_420) { + NonDSCBPP0 = 12; + NonDSCBPP1 = 15; + NonDSCBPP2 = 18; + MinDSCBPP = 6; + MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16; + } else if (Format == dm_444) { + NonDSCBPP0 = 18; + NonDSCBPP1 = 24; + NonDSCBPP2 = 30; + NonDSCBPP3 = 36; + MinDSCBPP = 8; + MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16; + } else { + if (Output == dm_hdmi) { + NonDSCBPP0 = 24; + NonDSCBPP1 = 24; + NonDSCBPP2 = 24; + } else { + NonDSCBPP0 = 16; + NonDSCBPP1 = 20; + NonDSCBPP2 = 24; + } + if (Format == dm_n422) { + MinDSCBPP = 7; + MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0; + } else { + MinDSCBPP = 8; + MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0; + } + } + if (Output == dm_dp2p0) { + MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128 / 132 * 383 / 384 * 65536 / 65540; + } else if (DSCEnable && Output == dm_dp) { + MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100); + } else { + MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock; + } + + if (DSCEnable) { + if (ODMModeDSC == dm_odm_combine_mode_4to1) + MaxLinkBPP = dml_min(MaxLinkBPP, 16); + else if (ODMModeDSC == dm_odm_combine_mode_2to1) + MaxLinkBPP = dml_min(MaxLinkBPP, 32); + else if (ODMModeDSC == dm_odm_split_mode_1to2) + MaxLinkBPP = 2 * MaxLinkBPP; + } else { + if (ODMModeNoDSC == dm_odm_combine_mode_4to1) + MaxLinkBPP = dml_min(MaxLinkBPP, 16); + else if (ODMModeNoDSC == dm_odm_combine_mode_2to1) + MaxLinkBPP = dml_min(MaxLinkBPP, 32); + else if (ODMModeNoDSC == dm_odm_split_mode_1to2) + MaxLinkBPP = 2 * MaxLinkBPP; + } + + if (DesiredBPP == 0) { + if (DSCEnable) { + if (MaxLinkBPP < MinDSCBPP) + return BPP_INVALID; + else if (MaxLinkBPP >= MaxDSCBPP) + return MaxDSCBPP; + else + return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0; + } else { + if (MaxLinkBPP >= NonDSCBPP3) + return NonDSCBPP3; + else if (MaxLinkBPP >= NonDSCBPP2) + return NonDSCBPP2; + else if (MaxLinkBPP >= NonDSCBPP1) + return NonDSCBPP1; + else if (MaxLinkBPP >= NonDSCBPP0) + return 16.0; + else + return BPP_INVALID; + } + } else { + if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || + DesiredBPP == NonDSCBPP0 || DesiredBPP == NonDSCBPP3)) || + (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) + return BPP_INVALID; + else + return DesiredBPP; + } + + *RequiredSlots = dml_ceil(DesiredBPP / MaxLinkBPP * 64, 1); + + return BPP_INVALID; +} // TruncToValidBPP + +double dml32_RequiredDTBCLK( + bool DSCEnable, + double PixelClock, + enum output_format_class OutputFormat, + double OutputBpp, + unsigned int DSCSlices, + unsigned int HTotal, + unsigned int HActive, + unsigned int AudioRate, + unsigned int AudioLayout) +{ + double PixelWordRate; + double HCActive; + double HCBlank; + double AverageTribyteRate; + double HActiveTribyteRate; + + if (DSCEnable != true) + return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0); + + PixelWordRate = PixelClock / (OutputFormat == dm_444 ? 1 : 2); + HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp * + dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1); + HCBlank = 64 + 32 * + dml_ceil(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1); + AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal; + HActiveTribyteRate = PixelWordRate * HCActive / HActive; + return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002; +} + +unsigned int dml32_DSCDelayRequirement(bool DSCEnabled, + enum odm_combine_mode ODMMode, + unsigned int DSCInputBitPerComponent, + double OutputBpp, + unsigned int HActive, + unsigned int HTotal, + unsigned int NumberOfDSCSlices, + enum output_format_class OutputFormat, + enum output_encoder_class Output, + double PixelClock, + double PixelClockBackEnd, + double dsc_delay_factor_wa) +{ + unsigned int DSCDelayRequirement_val; + + if (DSCEnabled == true && OutputBpp != 0) { + if (ODMMode == dm_odm_combine_mode_4to1) { + DSCDelayRequirement_val = 4 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, + dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 4, + OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output)); + } else if (ODMMode == dm_odm_combine_mode_2to1) { + DSCDelayRequirement_val = 2 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, + dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 2, + OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output)); + } else { + DSCDelayRequirement_val = dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, + dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices, + OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output); + } + + DSCDelayRequirement_val = DSCDelayRequirement_val + (HTotal - HActive) * + dml_ceil((double)DSCDelayRequirement_val / HActive, 1); + + DSCDelayRequirement_val = DSCDelayRequirement_val * PixelClock / PixelClockBackEnd; + + } else { + DSCDelayRequirement_val = 0; + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: DSCEnabled = %d\n", __func__, DSCEnabled); + dml_print("DML::%s: OutputBpp = %f\n", __func__, OutputBpp); + dml_print("DML::%s: HActive = %d\n", __func__, HActive); + dml_print("DML::%s: OutputFormat = %d\n", __func__, OutputFormat); + dml_print("DML::%s: DSCInputBitPerComponent = %d\n", __func__, DSCInputBitPerComponent); + dml_print("DML::%s: NumberOfDSCSlices = %d\n", __func__, NumberOfDSCSlices); + dml_print("DML::%s: DSCDelayRequirement_val = %d\n", __func__, DSCDelayRequirement_val); +#endif + + return dml_ceil(DSCDelayRequirement_val * dsc_delay_factor_wa, 1); +} + +void dml32_CalculateSurfaceSizeInMall( + unsigned int NumberOfActiveSurfaces, + unsigned int MALLAllocatedForDCN, + enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[], + bool DCCEnable[], + bool ViewportStationary[], + unsigned int ViewportXStartY[], + unsigned int ViewportYStartY[], + unsigned int ViewportXStartC[], + unsigned int ViewportYStartC[], + unsigned int ViewportWidthY[], + unsigned int ViewportHeightY[], + unsigned int BytesPerPixelY[], + unsigned int ViewportWidthC[], + unsigned int ViewportHeightC[], + unsigned int BytesPerPixelC[], + unsigned int SurfaceWidthY[], + unsigned int SurfaceWidthC[], + unsigned int SurfaceHeightY[], + unsigned int SurfaceHeightC[], + unsigned int Read256BytesBlockWidthY[], + unsigned int Read256BytesBlockWidthC[], + unsigned int Read256BytesBlockHeightY[], + unsigned int Read256BytesBlockHeightC[], + unsigned int ReadBlockWidthY[], + unsigned int ReadBlockWidthC[], + unsigned int ReadBlockHeightY[], + unsigned int ReadBlockHeightC[], + + /* Output */ + unsigned int SurfaceSizeInMALL[], + bool *ExceededMALLSize) +{ + unsigned int TotalSurfaceSizeInMALL = 0; + unsigned int k; + + for (k = 0; k < NumberOfActiveSurfaces; ++k) { + if (ViewportStationary[k]) { + SurfaceSizeInMALL[k] = dml_min(dml_ceil(SurfaceWidthY[k], ReadBlockWidthY[k]), + dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + ReadBlockWidthY[k] - 1, + ReadBlockWidthY[k]) - dml_floor(ViewportXStartY[k], + ReadBlockWidthY[k])) * dml_min(dml_ceil(SurfaceHeightY[k], + ReadBlockHeightY[k]), dml_floor(ViewportYStartY[k] + + ViewportHeightY[k] + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) - + dml_floor(ViewportYStartY[k], ReadBlockHeightY[k])) * BytesPerPixelY[k]; + + if (ReadBlockWidthC[k] > 0) { + SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + + dml_min(dml_ceil(SurfaceWidthC[k], ReadBlockWidthC[k]), + dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + + ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) - + dml_floor(ViewportXStartC[k], ReadBlockWidthC[k])) * + dml_min(dml_ceil(SurfaceHeightC[k], ReadBlockHeightC[k]), + dml_floor(ViewportYStartC[k] + ViewportHeightC[k] + + ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) - + dml_floor(ViewportYStartC[k], ReadBlockHeightC[k])) * + BytesPerPixelC[k]; + } + if (DCCEnable[k] == true) { + SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + + dml_min(dml_ceil(SurfaceWidthY[k], 8 * Read256BytesBlockWidthY[k]), + dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + 8 * + Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k]) + - dml_floor(ViewportXStartY[k], 8 * Read256BytesBlockWidthY[k])) + * dml_min(dml_ceil(SurfaceHeightY[k], 8 * + Read256BytesBlockHeightY[k]), dml_floor(ViewportYStartY[k] + + ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1, 8 * + Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStartY[k], 8 + * Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256; + if (Read256BytesBlockWidthC[k] > 0) { + SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + + dml_min(dml_ceil(SurfaceWidthC[k], 8 * + Read256BytesBlockWidthC[k]), + dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 8 + * Read256BytesBlockWidthC[k] - 1, 8 * + Read256BytesBlockWidthC[k]) - + dml_floor(ViewportXStartC[k], 8 * + Read256BytesBlockWidthC[k])) * + dml_min(dml_ceil(SurfaceHeightC[k], 8 * + Read256BytesBlockHeightC[k]), + dml_floor(ViewportYStartC[k] + ViewportHeightC[k] + + 8 * Read256BytesBlockHeightC[k] - 1, 8 * + Read256BytesBlockHeightC[k]) - + dml_floor(ViewportYStartC[k], 8 * + Read256BytesBlockHeightC[k])) * + BytesPerPixelC[k] / 256; + } + } + } else { + SurfaceSizeInMALL[k] = dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] + + ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) * + dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + + ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) * + BytesPerPixelY[k]; + if (ReadBlockWidthC[k] > 0) { + SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + + dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] + + ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) * + dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + + ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) * + BytesPerPixelC[k]; + } + if (DCCEnable[k] == true) { + SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + + dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] + 8 * + Read256BytesBlockWidthY[k] - 1), 8 * + Read256BytesBlockWidthY[k]) * + dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 8 * + Read256BytesBlockHeightY[k] - 1), 8 * + Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256; + + if (Read256BytesBlockWidthC[k] > 0) { + SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + + dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] + 8 * + Read256BytesBlockWidthC[k] - 1), 8 * + Read256BytesBlockWidthC[k]) * + dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 8 * + Read256BytesBlockHeightC[k] - 1), 8 * + Read256BytesBlockHeightC[k]) * + BytesPerPixelC[k] / 256; + } + } + } + } + + for (k = 0; k < NumberOfActiveSurfaces; ++k) { + if (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable) + TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k]; + } + *ExceededMALLSize = (TotalSurfaceSizeInMALL > MALLAllocatedForDCN * 1024 * 1024); +} // CalculateSurfaceSizeInMall + +void dml32_CalculateVMRowAndSwath( + unsigned int NumberOfActiveSurfaces, + DmlPipe myPipe[], + unsigned int SurfaceSizeInMALL[], + unsigned int PTEBufferSizeInRequestsLuma, + unsigned int PTEBufferSizeInRequestsChroma, + unsigned int DCCMetaBufferSizeBytes, + enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[], + enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], + unsigned int MALLAllocatedForDCN, + double SwathWidthY[], + double SwathWidthC[], + bool GPUVMEnable, + bool HostVMEnable, + unsigned int HostVMMaxNonCachedPageTableLevels, + unsigned int GPUVMMaxPageTableLevels, + unsigned int GPUVMMinPageSizeKBytes[], + unsigned int HostVMMinPageSize, + + /* Output */ + bool PTEBufferSizeNotExceeded[], + bool DCCMetaBufferSizeNotExceeded[], + unsigned int dpte_row_width_luma_ub[], + unsigned int dpte_row_width_chroma_ub[], + unsigned int dpte_row_height_luma[], + unsigned int dpte_row_height_chroma[], + unsigned int dpte_row_height_linear_luma[], // VBA_DELTA + unsigned int dpte_row_height_linear_chroma[], // VBA_DELTA + unsigned int meta_req_width[], + unsigned int meta_req_width_chroma[], + unsigned int meta_req_height[], + unsigned int meta_req_height_chroma[], + unsigned int meta_row_width[], + unsigned int meta_row_width_chroma[], + unsigned int meta_row_height[], + unsigned int meta_row_height_chroma[], + unsigned int vm_group_bytes[], + unsigned int dpte_group_bytes[], + unsigned int PixelPTEReqWidthY[], + unsigned int PixelPTEReqHeightY[], + unsigned int PTERequestSizeY[], + unsigned int PixelPTEReqWidthC[], + unsigned int PixelPTEReqHeightC[], + unsigned int PTERequestSizeC[], + unsigned int dpde0_bytes_per_frame_ub_l[], + unsigned int meta_pte_bytes_per_frame_ub_l[], + unsigned int dpde0_bytes_per_frame_ub_c[], + unsigned int meta_pte_bytes_per_frame_ub_c[], + double PrefetchSourceLinesY[], + double PrefetchSourceLinesC[], + double VInitPreFillY[], + double VInitPreFillC[], + unsigned int MaxNumSwathY[], + unsigned int MaxNumSwathC[], + double meta_row_bw[], + double dpte_row_bw[], + double PixelPTEBytesPerRow[], + double PDEAndMetaPTEBytesFrame[], + double MetaRowByte[], + bool use_one_row_for_frame[], + bool use_one_row_for_frame_flip[], + bool UsesMALLForStaticScreen[], + bool PTE_BUFFER_MODE[], + unsigned int BIGK_FRAGMENT_SIZE[]) +{ + unsigned int k; + unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX]; + unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX]; + unsigned int PDEAndMetaPTEBytesFrameY; + unsigned int PDEAndMetaPTEBytesFrameC; + unsigned int MetaRowByteY[DC__NUM_DPP__MAX]; + unsigned int MetaRowByteC[DC__NUM_DPP__MAX]; + unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX]; + unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX]; + unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX]; + unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX]; + unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX]; + unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX]; + unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX]; + unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX]; + bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX]; + + for (k = 0; k < NumberOfActiveSurfaces; ++k) { + if (HostVMEnable == true) { + vm_group_bytes[k] = 512; + dpte_group_bytes[k] = 512; + } else if (GPUVMEnable == true) { + vm_group_bytes[k] = 2048; + if (GPUVMMinPageSizeKBytes[k] >= 64 && IsVertical(myPipe[k].SourceRotation)) + dpte_group_bytes[k] = 512; + else + dpte_group_bytes[k] = 2048; + } else { + vm_group_bytes[k] = 0; + dpte_group_bytes[k] = 0; + } + + if (myPipe[k].SourcePixelFormat == dm_420_8 || myPipe[k].SourcePixelFormat == dm_420_10 || + myPipe[k].SourcePixelFormat == dm_420_12 || + myPipe[k].SourcePixelFormat == dm_rgbe_alpha) { + if ((myPipe[k].SourcePixelFormat == dm_420_10 || myPipe[k].SourcePixelFormat == dm_420_12) && + !IsVertical(myPipe[k].SourceRotation)) { + PTEBufferSizeInRequestsForLuma[k] = + (PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma) / 2; + PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsForLuma[k]; + } else { + PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma; + PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma; + } + + PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes( + myPipe[k].ViewportStationary, + myPipe[k].DCCEnable, + myPipe[k].DPPPerSurface, + myPipe[k].BlockHeight256BytesC, + myPipe[k].BlockWidth256BytesC, + myPipe[k].SourcePixelFormat, + myPipe[k].SurfaceTiling, + myPipe[k].BytePerPixelC, + myPipe[k].SourceRotation, + SwathWidthC[k], + myPipe[k].ViewportHeightChroma, + myPipe[k].ViewportXStartC, + myPipe[k].ViewportYStartC, + GPUVMEnable, + HostVMEnable, + HostVMMaxNonCachedPageTableLevels, + GPUVMMaxPageTableLevels, + GPUVMMinPageSizeKBytes[k], + HostVMMinPageSize, + PTEBufferSizeInRequestsForChroma[k], + myPipe[k].PitchC, + myPipe[k].DCCMetaPitchC, + myPipe[k].BlockWidthC, + myPipe[k].BlockHeightC, + + /* Output */ + &MetaRowByteC[k], + &PixelPTEBytesPerRowC[k], + &dpte_row_width_chroma_ub[k], + &dpte_row_height_chroma[k], + &dpte_row_height_linear_chroma[k], + &PixelPTEBytesPerRowC_one_row_per_frame[k], + &dpte_row_width_chroma_ub_one_row_per_frame[k], + &dpte_row_height_chroma_one_row_per_frame[k], + &meta_req_width_chroma[k], + &meta_req_height_chroma[k], + &meta_row_width_chroma[k], + &meta_row_height_chroma[k], + &PixelPTEReqWidthC[k], + &PixelPTEReqHeightC[k], + &PTERequestSizeC[k], + &dpde0_bytes_per_frame_ub_c[k], + &meta_pte_bytes_per_frame_ub_c[k]); + + PrefetchSourceLinesC[k] = dml32_CalculatePrefetchSourceLines( + myPipe[k].VRatioChroma, + myPipe[k].VTapsChroma, + myPipe[k].InterlaceEnable, + myPipe[k].ProgressiveToInterlaceUnitInOPP, + myPipe[k].SwathHeightC, + myPipe[k].SourceRotation, + myPipe[k].ViewportStationary, + SwathWidthC[k], + myPipe[k].ViewportHeightChroma, + myPipe[k].ViewportXStartC, + myPipe[k].ViewportYStartC, + + /* Output */ + &VInitPreFillC[k], + &MaxNumSwathC[k]); + } else { + PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma; + PTEBufferSizeInRequestsForChroma[k] = 0; + PixelPTEBytesPerRowC[k] = 0; + PDEAndMetaPTEBytesFrameC = 0; + MetaRowByteC[k] = 0; + MaxNumSwathC[k] = 0; + PrefetchSourceLinesC[k] = 0; + dpte_row_height_chroma_one_row_per_frame[k] = 0; + dpte_row_width_chroma_ub_one_row_per_frame[k] = 0; + PixelPTEBytesPerRowC_one_row_per_frame[k] = 0; + } + + PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes( + myPipe[k].ViewportStationary, + myPipe[k].DCCEnable, + myPipe[k].DPPPerSurface, + myPipe[k].BlockHeight256BytesY, + myPipe[k].BlockWidth256BytesY, + myPipe[k].SourcePixelFormat, + myPipe[k].SurfaceTiling, + myPipe[k].BytePerPixelY, + myPipe[k].SourceRotation, + SwathWidthY[k], + myPipe[k].ViewportHeight, + myPipe[k].ViewportXStart, + myPipe[k].ViewportYStart, + GPUVMEnable, + HostVMEnable, + HostVMMaxNonCachedPageTableLevels, + GPUVMMaxPageTableLevels, + GPUVMMinPageSizeKBytes[k], + HostVMMinPageSize, + PTEBufferSizeInRequestsForLuma[k], + myPipe[k].PitchY, + myPipe[k].DCCMetaPitchY, + myPipe[k].BlockWidthY, + myPipe[k].BlockHeightY, + + /* Output */ + &MetaRowByteY[k], + &PixelPTEBytesPerRowY[k], + &dpte_row_width_luma_ub[k], + &dpte_row_height_luma[k], + &dpte_row_height_linear_luma[k], + &PixelPTEBytesPerRowY_one_row_per_frame[k], + &dpte_row_width_luma_ub_one_row_per_frame[k], + &dpte_row_height_luma_one_row_per_frame[k], + &meta_req_width[k], + &meta_req_height[k], + &meta_row_width[k], + &meta_row_height[k], + &PixelPTEReqWidthY[k], + &PixelPTEReqHeightY[k], + &PTERequestSizeY[k], + &dpde0_bytes_per_frame_ub_l[k], + &meta_pte_bytes_per_frame_ub_l[k]); + + PrefetchSourceLinesY[k] = dml32_CalculatePrefetchSourceLines( + myPipe[k].VRatio, + myPipe[k].VTaps, + myPipe[k].InterlaceEnable, + myPipe[k].ProgressiveToInterlaceUnitInOPP, + myPipe[k].SwathHeightY, + myPipe[k].SourceRotation, + myPipe[k].ViewportStationary, + SwathWidthY[k], + myPipe[k].ViewportHeight, + myPipe[k].ViewportXStart, + myPipe[k].ViewportYStart, + + /* Output */ + &VInitPreFillY[k], + &MaxNumSwathY[k]); + + PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC; + MetaRowByte[k] = MetaRowByteY[k] + MetaRowByteC[k]; + + if (PixelPTEBytesPerRowY[k] <= 64 * PTEBufferSizeInRequestsForLuma[k] && + PixelPTEBytesPerRowC[k] <= 64 * PTEBufferSizeInRequestsForChroma[k]) { + PTEBufferSizeNotExceeded[k] = true; + } else { + PTEBufferSizeNotExceeded[k] = false; + } + + one_row_per_frame_fits_in_buffer[k] = (PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 * + PTEBufferSizeInRequestsForLuma[k] && + PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * PTEBufferSizeInRequestsForChroma[k]); + } + + dml32_CalculateMALLUseForStaticScreen( + NumberOfActiveSurfaces, + MALLAllocatedForDCN, + UseMALLForStaticScreen, // mode + SurfaceSizeInMALL, + one_row_per_frame_fits_in_buffer, + /* Output */ + UsesMALLForStaticScreen); // boolen + + for (k = 0; k < NumberOfActiveSurfaces; ++k) { + PTE_BUFFER_MODE[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] || + (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) || + (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) || + (GPUVMMinPageSizeKBytes[k] > 64); + BIGK_FRAGMENT_SIZE[k] = dml_log2(GPUVMMinPageSizeKBytes[k] * 1024) - 12; + } + + for (k = 0; k < NumberOfActiveSurfaces; ++k) { +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%d, SurfaceSizeInMALL = %d\n", __func__, k, SurfaceSizeInMALL[k]); + dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n", __func__, k, UsesMALLForStaticScreen[k]); +#endif + use_one_row_for_frame[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] || + (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) || + (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) || + (GPUVMMinPageSizeKBytes[k] > 64 && IsVertical(myPipe[k].SourceRotation)); + + use_one_row_for_frame_flip[k] = use_one_row_for_frame[k] && + !(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame); + + if (use_one_row_for_frame[k]) { + dpte_row_height_luma[k] = dpte_row_height_luma_one_row_per_frame[k]; + dpte_row_width_luma_ub[k] = dpte_row_width_luma_ub_one_row_per_frame[k]; + PixelPTEBytesPerRowY[k] = PixelPTEBytesPerRowY_one_row_per_frame[k]; + dpte_row_height_chroma[k] = dpte_row_height_chroma_one_row_per_frame[k]; + dpte_row_width_chroma_ub[k] = dpte_row_width_chroma_ub_one_row_per_frame[k]; + PixelPTEBytesPerRowC[k] = PixelPTEBytesPerRowC_one_row_per_frame[k]; + PTEBufferSizeNotExceeded[k] = one_row_per_frame_fits_in_buffer[k]; + } + + if (MetaRowByte[k] <= DCCMetaBufferSizeBytes) + DCCMetaBufferSizeNotExceeded[k] = true; + else + DCCMetaBufferSizeNotExceeded[k] = false; + + PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY[k] + PixelPTEBytesPerRowC[k]; + if (use_one_row_for_frame[k]) + PixelPTEBytesPerRow[k] = PixelPTEBytesPerRow[k] / 2; + + dml32_CalculateRowBandwidth( + GPUVMEnable, + myPipe[k].SourcePixelFormat, + myPipe[k].VRatio, + myPipe[k].VRatioChroma, + myPipe[k].DCCEnable, + myPipe[k].HTotal / myPipe[k].PixelClock, + MetaRowByteY[k], MetaRowByteC[k], + meta_row_height[k], + meta_row_height_chroma[k], + PixelPTEBytesPerRowY[k], + PixelPTEBytesPerRowC[k], + dpte_row_height_luma[k], + dpte_row_height_chroma[k], + + /* Output */ + &meta_row_bw[k], + &dpte_row_bw[k]); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%d, use_one_row_for_frame = %d\n", __func__, k, use_one_row_for_frame[k]); + dml_print("DML::%s: k=%d, use_one_row_for_frame_flip = %d\n", + __func__, k, use_one_row_for_frame_flip[k]); + dml_print("DML::%s: k=%d, UseMALLForPStateChange = %d\n", + __func__, k, UseMALLForPStateChange[k]); + dml_print("DML::%s: k=%d, dpte_row_height_luma = %d\n", __func__, k, dpte_row_height_luma[k]); + dml_print("DML::%s: k=%d, dpte_row_width_luma_ub = %d\n", + __func__, k, dpte_row_width_luma_ub[k]); + dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY = %d\n", __func__, k, PixelPTEBytesPerRowY[k]); + dml_print("DML::%s: k=%d, dpte_row_height_chroma = %d\n", + __func__, k, dpte_row_height_chroma[k]); + dml_print("DML::%s: k=%d, dpte_row_width_chroma_ub = %d\n", + __func__, k, dpte_row_width_chroma_ub[k]); + dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC = %d\n", __func__, k, PixelPTEBytesPerRowC[k]); + dml_print("DML::%s: k=%d, PixelPTEBytesPerRow = %d\n", __func__, k, PixelPTEBytesPerRow[k]); + dml_print("DML::%s: k=%d, PTEBufferSizeNotExceeded = %d\n", + __func__, k, PTEBufferSizeNotExceeded[k]); + dml_print("DML::%s: k=%d, PTE_BUFFER_MODE = %d\n", __func__, k, PTE_BUFFER_MODE[k]); + dml_print("DML::%s: k=%d, BIGK_FRAGMENT_SIZE = %d\n", __func__, k, BIGK_FRAGMENT_SIZE[k]); +#endif + } +} // CalculateVMRowAndSwath + +unsigned int dml32_CalculateVMAndRowBytes( + bool ViewportStationary, + bool DCCEnable, + unsigned int NumberOfDPPs, + unsigned int BlockHeight256Bytes, + unsigned int BlockWidth256Bytes, + enum source_format_class SourcePixelFormat, + unsigned int SurfaceTiling, + unsigned int BytePerPixel, + enum dm_rotation_angle SourceRotation, + double SwathWidth, + unsigned int ViewportHeight, + unsigned int ViewportXStart, + unsigned int ViewportYStart, + bool GPUVMEnable, + bool HostVMEnable, + unsigned int HostVMMaxNonCachedPageTableLevels, + unsigned int GPUVMMaxPageTableLevels, + unsigned int GPUVMMinPageSizeKBytes, + unsigned int HostVMMinPageSize, + unsigned int PTEBufferSizeInRequests, + unsigned int Pitch, + unsigned int DCCMetaPitch, + unsigned int MacroTileWidth, + unsigned int MacroTileHeight, + + /* Output */ + unsigned int *MetaRowByte, + unsigned int *PixelPTEBytesPerRow, + unsigned int *dpte_row_width_ub, + unsigned int *dpte_row_height, + unsigned int *dpte_row_height_linear, + unsigned int *PixelPTEBytesPerRow_one_row_per_frame, + unsigned int *dpte_row_width_ub_one_row_per_frame, + unsigned int *dpte_row_height_one_row_per_frame, + unsigned int *MetaRequestWidth, + unsigned int *MetaRequestHeight, + unsigned int *meta_row_width, + unsigned int *meta_row_height, + unsigned int *PixelPTEReqWidth, + unsigned int *PixelPTEReqHeight, + unsigned int *PTERequestSize, + unsigned int *DPDE0BytesFrame, + unsigned int *MetaPTEBytesFrame) +{ + unsigned int MPDEBytesFrame; + unsigned int DCCMetaSurfaceBytes; + unsigned int ExtraDPDEBytesFrame; + unsigned int PDEAndMetaPTEBytesFrame; + unsigned int HostVMDynamicLevels = 0; + unsigned int MacroTileSizeBytes; + unsigned int vp_height_meta_ub; + unsigned int vp_height_dpte_ub; + unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this + + if (GPUVMEnable == true && HostVMEnable == true) { + if (HostVMMinPageSize < 2048) + HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; + else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) + HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1); + else + HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2); + } + + *MetaRequestHeight = 8 * BlockHeight256Bytes; + *MetaRequestWidth = 8 * BlockWidth256Bytes; + if (SurfaceTiling == dm_sw_linear) { + *meta_row_height = 32; + *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth) + - dml_floor(ViewportXStart, *MetaRequestWidth); + } else if (!IsVertical(SourceRotation)) { + *meta_row_height = *MetaRequestHeight; + if (ViewportStationary && NumberOfDPPs == 1) { + *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, + *MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth); + } else { + *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth; + } + *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0; + } else { + *meta_row_height = *MetaRequestWidth; + if (ViewportStationary && NumberOfDPPs == 1) { + *meta_row_width = dml_floor(ViewportYStart + ViewportHeight + *MetaRequestHeight - 1, + *MetaRequestHeight) - dml_floor(ViewportYStart, *MetaRequestHeight); + } else { + *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight; + } + *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0; + } + + if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) { + vp_height_meta_ub = dml_floor(ViewportYStart + ViewportHeight + 64 * BlockHeight256Bytes - 1, + 64 * BlockHeight256Bytes) - dml_floor(ViewportYStart, 64 * BlockHeight256Bytes); + } else if (!IsVertical(SourceRotation)) { + vp_height_meta_ub = dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes; + } else { + vp_height_meta_ub = dml_ceil(SwathWidth - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes; + } + + DCCMetaSurfaceBytes = DCCMetaPitch * vp_height_meta_ub * BytePerPixel / 256.0; + + if (GPUVMEnable == true) { + *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / + (8 * 4.0 * 1024), 1) + 1) * 64; + MPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 1); + } else { + *MetaPTEBytesFrame = 0; + MPDEBytesFrame = 0; + } + + if (DCCEnable != true) { + *MetaPTEBytesFrame = 0; + MPDEBytesFrame = 0; + *MetaRowByte = 0; + } + + MacroTileSizeBytes = MacroTileWidth * BytePerPixel * MacroTileHeight; + + if (GPUVMEnable == true && GPUVMMaxPageTableLevels > 1) { + if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) { + vp_height_dpte_ub = dml_floor(ViewportYStart + ViewportHeight + + MacroTileHeight - 1, MacroTileHeight) - + dml_floor(ViewportYStart, MacroTileHeight); + } else if (!IsVertical(SourceRotation)) { + vp_height_dpte_ub = dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight; + } else { + vp_height_dpte_ub = dml_ceil(SwathWidth - 1, MacroTileHeight) + MacroTileHeight; + } + *DPDE0BytesFrame = 64 * (dml_ceil((Pitch * vp_height_dpte_ub * BytePerPixel - MacroTileSizeBytes) / + (8 * 2097152), 1) + 1); + ExtraDPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 2); + } else { + *DPDE0BytesFrame = 0; + ExtraDPDEBytesFrame = 0; + vp_height_dpte_ub = 0; + } + + PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable); + dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable); + dml_print("DML::%s: SwModeLinear = %d\n", __func__, SurfaceTiling == dm_sw_linear); + dml_print("DML::%s: BytePerPixel = %d\n", __func__, BytePerPixel); + dml_print("DML::%s: GPUVMMaxPageTableLevels = %d\n", __func__, GPUVMMaxPageTableLevels); + dml_print("DML::%s: BlockHeight256Bytes = %d\n", __func__, BlockHeight256Bytes); + dml_print("DML::%s: BlockWidth256Bytes = %d\n", __func__, BlockWidth256Bytes); + dml_print("DML::%s: MacroTileHeight = %d\n", __func__, MacroTileHeight); + dml_print("DML::%s: MacroTileWidth = %d\n", __func__, MacroTileWidth); + dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame); + dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame); + dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame); + dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame); + dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); + dml_print("DML::%s: ViewportHeight = %d\n", __func__, ViewportHeight); + dml_print("DML::%s: SwathWidth = %d\n", __func__, SwathWidth); + dml_print("DML::%s: vp_height_dpte_ub = %d\n", __func__, vp_height_dpte_ub); +#endif + + if (HostVMEnable == true) + PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels); + + if (SurfaceTiling == dm_sw_linear) { + *PixelPTEReqHeight = 1; + *PixelPTEReqWidth = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel; + PixelPTEReqWidth_linear = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel; + *PTERequestSize = 64; + } else if (GPUVMMinPageSizeKBytes == 4) { + *PixelPTEReqHeight = 16 * BlockHeight256Bytes; + *PixelPTEReqWidth = 16 * BlockWidth256Bytes; + *PTERequestSize = 128; + } else { + *PixelPTEReqHeight = MacroTileHeight; + *PixelPTEReqWidth = 8 * 1024 * GPUVMMinPageSizeKBytes / (MacroTileHeight * BytePerPixel); + *PTERequestSize = 64; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes); + dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d (after HostVM factor)\n", __func__, PDEAndMetaPTEBytesFrame); + dml_print("DML::%s: PixelPTEReqHeight = %d\n", __func__, *PixelPTEReqHeight); + dml_print("DML::%s: PixelPTEReqWidth = %d\n", __func__, *PixelPTEReqWidth); + dml_print("DML::%s: PixelPTEReqWidth_linear = %d\n", __func__, PixelPTEReqWidth_linear); + dml_print("DML::%s: PTERequestSize = %d\n", __func__, *PTERequestSize); + dml_print("DML::%s: Pitch = %d\n", __func__, Pitch); +#endif + + *dpte_row_height_one_row_per_frame = vp_height_dpte_ub; + *dpte_row_width_ub_one_row_per_frame = (dml_ceil(((double)Pitch * (double)*dpte_row_height_one_row_per_frame / + (double) *PixelPTEReqHeight - 1) / (double) *PixelPTEReqWidth, 1) + 1) * + (double) *PixelPTEReqWidth; + *PixelPTEBytesPerRow_one_row_per_frame = *dpte_row_width_ub_one_row_per_frame / *PixelPTEReqWidth * + *PTERequestSize; + + if (SurfaceTiling == dm_sw_linear) { + *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * + *PixelPTEReqWidth / Pitch), 1)); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: dpte_row_height = %d (1)\n", __func__, + PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch); + dml_print("DML::%s: dpte_row_height = %f (2)\n", __func__, + dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch)); + dml_print("DML::%s: dpte_row_height = %f (3)\n", __func__, + dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1)); + dml_print("DML::%s: dpte_row_height = %d (4)\n", __func__, + 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * + *PixelPTEReqWidth / Pitch), 1)); + dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height); +#endif + *dpte_row_width_ub = dml_ceil(((double) Pitch * (double) *dpte_row_height - 1), + (double) *PixelPTEReqWidth) + *PixelPTEReqWidth; + *PixelPTEBytesPerRow = *dpte_row_width_ub / (double)*PixelPTEReqWidth * (double)*PTERequestSize; + + // VBA_DELTA, VBA doesn't have programming value for pte row height linear. + *dpte_row_height_linear = 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * + PixelPTEReqWidth_linear / Pitch), 1); + if (*dpte_row_height_linear > 128) + *dpte_row_height_linear = 128; + + } else if (!IsVertical(SourceRotation)) { + *dpte_row_height = *PixelPTEReqHeight; + + if (GPUVMMinPageSizeKBytes > 64) { + *dpte_row_width_ub = (dml_ceil((Pitch * *dpte_row_height / *PixelPTEReqHeight - 1) / + *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth; + } else if (ViewportStationary && (NumberOfDPPs == 1)) { + *dpte_row_width_ub = dml_floor(ViewportXStart + SwathWidth + + *PixelPTEReqWidth - 1, *PixelPTEReqWidth) - + dml_floor(ViewportXStart, *PixelPTEReqWidth); + } else { + *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * + *PixelPTEReqWidth; + } + + *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize; + } else { + *dpte_row_height = dml_min(*PixelPTEReqWidth, MacroTileWidth); + + if (ViewportStationary && (NumberOfDPPs == 1)) { + *dpte_row_width_ub = dml_floor(ViewportYStart + ViewportHeight + *PixelPTEReqHeight - 1, + *PixelPTEReqHeight) - dml_floor(ViewportYStart, *PixelPTEReqHeight); + } else { + *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) + * *PixelPTEReqHeight; + } + + *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize; + } + + if (GPUVMEnable != true) + *PixelPTEBytesPerRow = 0; + if (HostVMEnable == true) + *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes); + dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height); + dml_print("DML::%s: dpte_row_height_linear = %d\n", __func__, *dpte_row_height_linear); + dml_print("DML::%s: dpte_row_width_ub = %d\n", __func__, *dpte_row_width_ub); + dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, *PixelPTEBytesPerRow); + dml_print("DML::%s: PTEBufferSizeInRequests = %d\n", __func__, PTEBufferSizeInRequests); + dml_print("DML::%s: dpte_row_height_one_row_per_frame = %d\n", __func__, *dpte_row_height_one_row_per_frame); + dml_print("DML::%s: dpte_row_width_ub_one_row_per_frame = %d\n", + __func__, *dpte_row_width_ub_one_row_per_frame); + dml_print("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %d\n", + __func__, *PixelPTEBytesPerRow_one_row_per_frame); + dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", + *MetaPTEBytesFrame); +#endif + + return PDEAndMetaPTEBytesFrame; +} // CalculateVMAndRowBytes + +double dml32_CalculatePrefetchSourceLines( + double VRatio, + unsigned int VTaps, + bool Interlace, + bool ProgressiveToInterlaceUnitInOPP, + unsigned int SwathHeight, + enum dm_rotation_angle SourceRotation, + bool ViewportStationary, + double SwathWidth, + unsigned int ViewportHeight, + unsigned int ViewportXStart, + unsigned int ViewportYStart, + + /* Output */ + double *VInitPreFill, + unsigned int *MaxNumSwath) +{ + + unsigned int vp_start_rot; + unsigned int sw0_tmp; + unsigned int MaxPartialSwath; + double numLines; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: VRatio = %f\n", __func__, VRatio); + dml_print("DML::%s: VTaps = %d\n", __func__, VTaps); + dml_print("DML::%s: ViewportXStart = %d\n", __func__, ViewportXStart); + dml_print("DML::%s: ViewportYStart = %d\n", __func__, ViewportYStart); + dml_print("DML::%s: ViewportStationary = %d\n", __func__, ViewportStationary); + dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight); +#endif + if (ProgressiveToInterlaceUnitInOPP) + *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1) / 2.0, 1); + else + *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1); + + if (ViewportStationary) { + if (SourceRotation == dm_rotation_180 || SourceRotation == dm_rotation_180m) { + vp_start_rot = SwathHeight - + (((unsigned int) (ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1); + } else if (SourceRotation == dm_rotation_270 || SourceRotation == dm_rotation_90m) { + vp_start_rot = ViewportXStart; + } else if (SourceRotation == dm_rotation_90 || SourceRotation == dm_rotation_270m) { + vp_start_rot = SwathHeight - + (((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1); + } else { + vp_start_rot = ViewportYStart; + } + sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight); + if (sw0_tmp < *VInitPreFill) + *MaxNumSwath = dml_ceil((*VInitPreFill - sw0_tmp) / SwathHeight, 1) + 1; + else + *MaxNumSwath = 1; + MaxPartialSwath = dml_max(1, (unsigned int) (vp_start_rot + *VInitPreFill - 1) % SwathHeight); + } else { + *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1; + if (*VInitPreFill > 1) + MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill - 2) % SwathHeight); + else + MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight); + } + numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: vp_start_rot = %d\n", __func__, vp_start_rot); + dml_print("DML::%s: VInitPreFill = %d\n", __func__, *VInitPreFill); + dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath); + dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath); + dml_print("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines); +#endif + return numLines; + +} // CalculatePrefetchSourceLines + +void dml32_CalculateMALLUseForStaticScreen( + unsigned int NumberOfActiveSurfaces, + unsigned int MALLAllocatedForDCNFinal, + enum dm_use_mall_for_static_screen_mode *UseMALLForStaticScreen, + unsigned int SurfaceSizeInMALL[], + bool one_row_per_frame_fits_in_buffer[], + + /* output */ + bool UsesMALLForStaticScreen[]) +{ + unsigned int k; + unsigned int SurfaceToAddToMALL; + bool CanAddAnotherSurfaceToMALL; + unsigned int TotalSurfaceSizeInMALL; + + TotalSurfaceSizeInMALL = 0; + for (k = 0; k < NumberOfActiveSurfaces; ++k) { + UsesMALLForStaticScreen[k] = (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable); + if (UsesMALLForStaticScreen[k]) + TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k]; +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n", __func__, k, UsesMALLForStaticScreen[k]); + dml_print("DML::%s: k=%d, TotalSurfaceSizeInMALL = %d\n", __func__, k, TotalSurfaceSizeInMALL); +#endif + } + + SurfaceToAddToMALL = 0; + CanAddAnotherSurfaceToMALL = true; + while (CanAddAnotherSurfaceToMALL) { + CanAddAnotherSurfaceToMALL = false; + for (k = 0; k < NumberOfActiveSurfaces; ++k) { + if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCNFinal * 1024 * 1024 && + !UsesMALLForStaticScreen[k] && + UseMALLForStaticScreen[k] != dm_use_mall_static_screen_disable && + one_row_per_frame_fits_in_buffer[k] && + (!CanAddAnotherSurfaceToMALL || + SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) { + CanAddAnotherSurfaceToMALL = true; + SurfaceToAddToMALL = k; +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%d, UseMALLForStaticScreen = %d (dis, en, optimize)\n", + __func__, k, UseMALLForStaticScreen[k]); +#endif + } + } + if (CanAddAnotherSurfaceToMALL) { + UsesMALLForStaticScreen[SurfaceToAddToMALL] = true; + TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL]; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: SurfaceToAddToMALL = %d\n", __func__, SurfaceToAddToMALL); + dml_print("DML::%s: TotalSurfaceSizeInMALL = %d\n", __func__, TotalSurfaceSizeInMALL); +#endif + + } + } +} + +void dml32_CalculateRowBandwidth( + bool GPUVMEnable, + enum source_format_class SourcePixelFormat, + double VRatio, + double VRatioChroma, + bool DCCEnable, + double LineTime, + unsigned int MetaRowByteLuma, + unsigned int MetaRowByteChroma, + unsigned int meta_row_height_luma, + unsigned int meta_row_height_chroma, + unsigned int PixelPTEBytesPerRowLuma, + unsigned int PixelPTEBytesPerRowChroma, + unsigned int dpte_row_height_luma, + unsigned int dpte_row_height_chroma, + /* Output */ + double *meta_row_bw, + double *dpte_row_bw) +{ + if (DCCEnable != true) { + *meta_row_bw = 0; + } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || + SourcePixelFormat == dm_rgbe_alpha) { + *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * + MetaRowByteChroma / (meta_row_height_chroma * LineTime); + } else { + *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime); + } + + if (GPUVMEnable != true) { + *dpte_row_bw = 0; + } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || + SourcePixelFormat == dm_rgbe_alpha) { + *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) + + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime); + } else { + *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime); + } +} + +double dml32_CalculateUrgentLatency( + double UrgentLatencyPixelDataOnly, + double UrgentLatencyPixelMixedWithVMData, + double UrgentLatencyVMDataOnly, + bool DoUrgentLatencyAdjustment, + double UrgentLatencyAdjustmentFabricClockComponent, + double UrgentLatencyAdjustmentFabricClockReference, + double FabricClock) +{ + double ret; + + ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly); + if (DoUrgentLatencyAdjustment == true) { + ret = ret + UrgentLatencyAdjustmentFabricClockComponent * + (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1); + } + return ret; +} + +void dml32_CalculateUrgentBurstFactor( + enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange, + unsigned int swath_width_luma_ub, + unsigned int swath_width_chroma_ub, + unsigned int SwathHeightY, + unsigned int SwathHeightC, + double LineTime, + double UrgentLatency, + double CursorBufferSize, + unsigned int CursorWidth, + unsigned int CursorBPP, + double VRatio, + double VRatioC, + double BytePerPixelInDETY, + double BytePerPixelInDETC, + unsigned int DETBufferSizeY, + unsigned int DETBufferSizeC, + /* Output */ + double *UrgentBurstFactorCursor, + double *UrgentBurstFactorLuma, + double *UrgentBurstFactorChroma, + bool *NotEnoughUrgentLatencyHiding) +{ + double LinesInDETLuma; + double LinesInDETChroma; + unsigned int LinesInCursorBuffer; + double CursorBufferSizeInTime; + double DETBufferSizeInTimeLuma; + double DETBufferSizeInTimeChroma; + + *NotEnoughUrgentLatencyHiding = 0; + + if (CursorWidth > 0) { + LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / + (CursorWidth * CursorBPP / 8.0)), 1.0); + if (VRatio > 0) { + CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio; + if (CursorBufferSizeInTime - UrgentLatency <= 0) { + *NotEnoughUrgentLatencyHiding = 1; + *UrgentBurstFactorCursor = 0; + } else { + *UrgentBurstFactorCursor = CursorBufferSizeInTime / + (CursorBufferSizeInTime - UrgentLatency); + } + } else { + *UrgentBurstFactorCursor = 1; + } + } + + LinesInDETLuma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ? 1024*1024 : + DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub; + + if (VRatio > 0) { + DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio; + if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) { + *NotEnoughUrgentLatencyHiding = 1; + *UrgentBurstFactorLuma = 0; + } else { + *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency); + } + } else { + *UrgentBurstFactorLuma = 1; + } + + if (BytePerPixelInDETC > 0) { + LinesInDETChroma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ? + 1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC + / swath_width_chroma_ub; + + if (VRatio > 0) { + DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio; + if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) { + *NotEnoughUrgentLatencyHiding = 1; + *UrgentBurstFactorChroma = 0; + } else { + *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma + / (DETBufferSizeInTimeChroma - UrgentLatency); + } + } else { + *UrgentBurstFactorChroma = 1; + } + } +} // CalculateUrgentBurstFactor + +void dml32_CalculateDCFCLKDeepSleep( + unsigned int NumberOfActiveSurfaces, + unsigned int BytePerPixelY[], + unsigned int BytePerPixelC[], + double VRatio[], + double VRatioChroma[], + double SwathWidthY[], + double SwathWidthC[], + unsigned int DPPPerSurface[], + double HRatio[], + double HRatioChroma[], + double PixelClock[], + double PSCL_THROUGHPUT[], + double PSCL_THROUGHPUT_CHROMA[], + double Dppclk[], + double ReadBandwidthLuma[], + double ReadBandwidthChroma[], + unsigned int ReturnBusWidth, + + /* Output */ + double *DCFClkDeepSleep) +{ + unsigned int k; + double DisplayPipeLineDeliveryTimeLuma; + double DisplayPipeLineDeliveryTimeChroma; + double DCFClkDeepSleepPerSurface[DC__NUM_DPP__MAX]; + double ReadBandwidth = 0.0; + + for (k = 0; k < NumberOfActiveSurfaces; ++k) { + + if (VRatio[k] <= 1) { + DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / HRatio[k] + / PixelClock[k]; + } else { + DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; + } + if (BytePerPixelC[k] == 0) { + DisplayPipeLineDeliveryTimeChroma = 0; + } else { + if (VRatioChroma[k] <= 1) { + DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * + DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k]; + } else { + DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] + / Dppclk[k]; + } + } + + if (BytePerPixelC[k] > 0) { + DCFClkDeepSleepPerSurface[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * + BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma, + __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / + 32.0 / DisplayPipeLineDeliveryTimeChroma); + } else { + DCFClkDeepSleepPerSurface[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / + 64.0 / DisplayPipeLineDeliveryTimeLuma; + } + DCFClkDeepSleepPerSurface[k] = dml_max(DCFClkDeepSleepPerSurface[k], PixelClock[k] / 16); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%d, PixelClock = %f\n", __func__, k, PixelClock[k]); + dml_print("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]); +#endif + } + + for (k = 0; k < NumberOfActiveSurfaces; ++k) + ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; + + *DCFClkDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / (double) ReturnBusWidth); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: __DML_MIN_DCFCLK_FACTOR__ = %f\n", __func__, __DML_MIN_DCFCLK_FACTOR__); + dml_print("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth); + dml_print("DML::%s: ReturnBusWidth = %d\n", __func__, ReturnBusWidth); + dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep); +#endif + + for (k = 0; k < NumberOfActiveSurfaces; ++k) + *DCFClkDeepSleep = dml_max(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep); +#endif +} // CalculateDCFCLKDeepSleep + +double dml32_CalculateWriteBackDelay( + enum source_format_class WritebackPixelFormat, + double WritebackHRatio, + double WritebackVRatio, + unsigned int WritebackVTaps, + unsigned int WritebackDestinationWidth, + unsigned int WritebackDestinationHeight, + unsigned int WritebackSourceHeight, + unsigned int HTotal) +{ + double CalculateWriteBackDelay; + double Line_length; + double Output_lines_last_notclamped; + double WritebackVInit; + + WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2; + Line_length = dml_max((double) WritebackDestinationWidth, + dml_ceil((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps); + Output_lines_last_notclamped = WritebackDestinationHeight - 1 - + dml_ceil(((double)WritebackSourceHeight - + (double) WritebackVInit) / (double)WritebackVRatio, 1.0); + if (Output_lines_last_notclamped < 0) { + CalculateWriteBackDelay = 0; + } else { + CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + + (HTotal - WritebackDestinationWidth) + 80; + } + return CalculateWriteBackDelay; +} + +void dml32_UseMinimumDCFCLK( + enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], + bool DRRDisplay[], + bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal, + unsigned int MaxInterDCNTileRepeaters, + unsigned int MaxPrefetchMode, + double DRAMClockChangeLatencyFinal, + double FCLKChangeLatency, + double SREnterPlusExitTime, + unsigned int ReturnBusWidth, + unsigned int RoundTripPingLatencyCycles, + unsigned int ReorderingBytes, + unsigned int PixelChunkSizeInKByte, + unsigned int MetaChunkSize, + bool GPUVMEnable, + unsigned int GPUVMMaxPageTableLevels, + bool HostVMEnable, + unsigned int NumberOfActiveSurfaces, + double HostVMMinPageSize, + unsigned int HostVMMaxNonCachedPageTableLevels, + bool DynamicMetadataVMEnabled, + bool ImmediateFlipRequirement, + bool ProgressiveToInterlaceUnitInOPP, + double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation, + double PercentOfIdealSDPPortBWReceivedAfterUrgLatency, + unsigned int VTotal[], + unsigned int VActive[], + unsigned int DynamicMetadataTransmittedBytes[], + unsigned int DynamicMetadataLinesBeforeActiveRequired[], + bool Interlace[], + double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX], + double RequiredDISPCLK[][2], + double UrgLatency[], + unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX], + double ProjectedDCFClkDeepSleep[][2], + double MaximumVStartup[][2][DC__NUM_DPP__MAX], + unsigned int TotalNumberOfActiveDPP[][2], + unsigned int TotalNumberOfDCCActiveDPP[][2], + unsigned int dpte_group_bytes[], + double PrefetchLinesY[][2][DC__NUM_DPP__MAX], + double PrefetchLinesC[][2][DC__NUM_DPP__MAX], + unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX], + unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX], + unsigned int BytePerPixelY[], + unsigned int BytePerPixelC[], + unsigned int HTotal[], + double PixelClock[], + double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX], + double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX], + double MetaRowBytes[][2][DC__NUM_DPP__MAX], + bool DynamicMetadataEnable[], + double ReadBandwidthLuma[], + double ReadBandwidthChroma[], + double DCFCLKPerState[], + /* Output */ + double DCFCLKState[][2]) +{ + unsigned int i, j, k; + unsigned int dummy1; + double dummy2, dummy3; + double NormalEfficiency; + double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2]; + + NormalEfficiency = PercentOfIdealSDPPortBWReceivedAfterUrgLatency / 100.0; + for (i = 0; i < DC__VOLTAGE_STATES; ++i) { + for (j = 0; j <= 1; ++j) { + double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX]; + double PrefetchPixelLinesTime[DC__NUM_DPP__MAX]; + double DCFCLKRequiredForPeakBandwidthPerSurface[DC__NUM_DPP__MAX]; + double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX]; + double MinimumTWait = 0.0; + double DPTEBandwidth; + double DCFCLKRequiredForAverageBandwidth; + unsigned int ExtraLatencyBytes; + double ExtraLatencyCycles; + double DCFCLKRequiredForPeakBandwidth; + unsigned int NoOfDPPState[DC__NUM_DPP__MAX]; + double MinimumTvmPlus2Tr0; + + TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0; + for (k = 0; k < NumberOfActiveSurfaces; ++k) { + TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j] + + NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k] + / (15.75 * HTotal[k] / PixelClock[k]); + } + + for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k) + NoOfDPPState[k] = NoOfDPP[i][j][k]; + + DPTEBandwidth = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]; + DCFCLKRequiredForAverageBandwidth = dml_max(ProjectedDCFClkDeepSleep[i][j], DPTEBandwidth / NormalEfficiency / ReturnBusWidth); + + ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(ReorderingBytes, + TotalNumberOfActiveDPP[i][j], PixelChunkSizeInKByte, + TotalNumberOfDCCActiveDPP[i][j], MetaChunkSize, GPUVMEnable, HostVMEnable, + NumberOfActiveSurfaces, NoOfDPPState, dpte_group_bytes, 1, HostVMMinPageSize, + HostVMMaxNonCachedPageTableLevels); + ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + + ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth; + for (k = 0; k < NumberOfActiveSurfaces; ++k) { + double DCFCLKCyclesRequiredInPrefetch; + double PrefetchTime; + + PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k] + * swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k] + + PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k] + * BytePerPixelC[k]) / NormalEfficiency + / ReturnBusWidth; + DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k] + + PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency + / NormalEfficiency / ReturnBusWidth + * (GPUVMMaxPageTableLevels > 2 ? 1 : 0) + + 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency + / ReturnBusWidth + + 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth + + PixelDCFCLKCyclesRequiredInPrefetch[k]; + PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k]) + * HTotal[k] / PixelClock[k]; + DynamicMetadataVMExtraLatency[k] = (GPUVMEnable == true && + DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ? + UrgLatency[i] * GPUVMMaxPageTableLevels * + (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0; + + MinimumTWait = dml32_CalculateTWait(MaxPrefetchMode, + UseMALLForPStateChange[k], + SynchronizeDRRDisplaysForUCLKPStateChangeFinal, + DRRDisplay[k], + DRAMClockChangeLatencyFinal, + FCLKChangeLatency, + UrgLatency[i], + SREnterPlusExitTime); + + PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] - + MinimumTWait - UrgLatency[i] * + ((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels : + GPUVMMaxPageTableLevels - 2) * (HostVMEnable == true ? + HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) - + DynamicMetadataVMExtraLatency[k]; + + if (PrefetchTime > 0) { + double ExpectedVRatioPrefetch; + + ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime * + PixelDCFCLKCyclesRequiredInPrefetch[k] / + DCFCLKCyclesRequiredInPrefetch); + DCFCLKRequiredForPeakBandwidthPerSurface[k] = NoOfDPPState[k] * + PixelDCFCLKCyclesRequiredInPrefetch[k] / + PrefetchPixelLinesTime[k] * + dml_max(1.0, ExpectedVRatioPrefetch) * + dml_max(1.0, ExpectedVRatioPrefetch / 4); + if (HostVMEnable == true || ImmediateFlipRequirement == true) { + DCFCLKRequiredForPeakBandwidthPerSurface[k] = + DCFCLKRequiredForPeakBandwidthPerSurface[k] + + NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / + NormalEfficiency / ReturnBusWidth; + } + } else { + DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i]; + } + if (DynamicMetadataEnable[k] == true) { + double TSetupPipe; + double TdmbfPipe; + double TdmsksPipe; + double TdmecPipe; + double AllowedTimeForUrgentExtraLatency; + + dml32_CalculateVUpdateAndDynamicMetadataParameters( + MaxInterDCNTileRepeaters, + RequiredDPPCLKPerSurface[i][j][k], + RequiredDISPCLK[i][j], + ProjectedDCFClkDeepSleep[i][j], + PixelClock[k], + HTotal[k], + VTotal[k] - VActive[k], + DynamicMetadataTransmittedBytes[k], + DynamicMetadataLinesBeforeActiveRequired[k], + Interlace[k], + ProgressiveToInterlaceUnitInOPP, + + /* output */ + &TSetupPipe, + &TdmbfPipe, + &TdmecPipe, + &TdmsksPipe, + &dummy1, + &dummy2, + &dummy3); + AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] / + PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - + TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k]; + if (AllowedTimeForUrgentExtraLatency > 0) + DCFCLKRequiredForPeakBandwidthPerSurface[k] = + dml_max(DCFCLKRequiredForPeakBandwidthPerSurface[k], + ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency); + else + DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i]; + } + } + DCFCLKRequiredForPeakBandwidth = 0; + for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k) { + DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + + DCFCLKRequiredForPeakBandwidthPerSurface[k]; + } + MinimumTvmPlus2Tr0 = UrgLatency[i] * (GPUVMEnable == true ? + (HostVMEnable == true ? (GPUVMMaxPageTableLevels + 2) * + (HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : 0); + for (k = 0; k < NumberOfActiveSurfaces; ++k) { + double MaximumTvmPlus2Tr0PlusTsw; + + MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] / + PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k]; + if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) { + DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i]; + } else { + DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth, + 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - + MinimumTvmPlus2Tr0 - + PrefetchPixelLinesTime[k] / 4), + (2 * ExtraLatencyCycles + + PixelDCFCLKCyclesRequiredInPrefetch[k]) / + (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0)); + } + } + DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 * + dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth)); + } + } +} + +unsigned int dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes, + unsigned int TotalNumberOfActiveDPP, + unsigned int PixelChunkSizeInKByte, + unsigned int TotalNumberOfDCCActiveDPP, + unsigned int MetaChunkSize, + bool GPUVMEnable, + bool HostVMEnable, + unsigned int NumberOfActiveSurfaces, + unsigned int NumberOfDPP[], + unsigned int dpte_group_bytes[], + double HostVMInefficiencyFactor, + double HostVMMinPageSize, + unsigned int HostVMMaxNonCachedPageTableLevels) +{ + unsigned int k; + double ret; + unsigned int HostVMDynamicLevels; + + if (GPUVMEnable == true && HostVMEnable == true) { + if (HostVMMinPageSize < 2048) + HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; + else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) + HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1); + else + HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2); + } else { + HostVMDynamicLevels = 0; + } + + ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0; + + if (GPUVMEnable == true) { + for (k = 0; k < NumberOfActiveSurfaces; ++k) { + ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * + (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor; + } + } + return ret; +} + +void dml32_CalculateVUpdateAndDynamicMetadataParameters( + unsigned int MaxInterDCNTileRepeaters, + double Dppclk, + double Dispclk, + double DCFClkDeepSleep, + double PixelClock, + unsigned int HTotal, + unsigned int VBlank, + unsigned int DynamicMetadataTransmittedBytes, + unsigned int DynamicMetadataLinesBeforeActiveRequired, + unsigned int InterlaceEnable, + bool ProgressiveToInterlaceUnitInOPP, + + /* output */ + double *TSetup, + double *Tdmbf, + double *Tdmec, + double *Tdmsks, + unsigned int *VUpdateOffsetPix, + double *VUpdateWidthPix, + double *VReadyOffsetPix) +{ + double TotalRepeaterDelayTime; + + TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk); + *VUpdateWidthPix = + dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0); + *VReadyOffsetPix = dml_ceil(dml_max(150.0 / Dppclk, + TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0); + *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1.0); + *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock; + *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk; + *Tdmec = HTotal / PixelClock; + + if (DynamicMetadataLinesBeforeActiveRequired == 0) + *Tdmsks = VBlank * HTotal / PixelClock / 2.0; + else + *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock; + + if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) + *Tdmsks = *Tdmsks / 2; +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix); + dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix); + dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix); + + dml_print("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %d\n", + __func__, DynamicMetadataLinesBeforeActiveRequired); + dml_print("DML::%s: VBlank = %d\n", __func__, VBlank); + dml_print("DML::%s: HTotal = %d\n", __func__, HTotal); + dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock); + dml_print("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks); +#endif +} + +double dml32_CalculateTWait( + unsigned int PrefetchMode, + enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange, + bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal, + bool DRRDisplay, + double DRAMClockChangeLatency, + double FCLKChangeLatency, + double UrgentLatency, + double SREnterPlusExitTime) +{ + double TWait = 0.0; + + if (PrefetchMode == 0 && + !(UseMALLForPStateChange == dm_use_mall_pstate_change_full_frame) && + !(UseMALLForPStateChange == dm_use_mall_pstate_change_sub_viewport) && + !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe) && + !(SynchronizeDRRDisplaysForUCLKPStateChangeFinal && DRRDisplay)) { + TWait = dml_max3(DRAMClockChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency); + } else if (PrefetchMode <= 1 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) { + TWait = dml_max3(FCLKChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency); + } else if (PrefetchMode <= 2 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) { + TWait = dml_max(SREnterPlusExitTime, UrgentLatency); + } else { + TWait = UrgentLatency; + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: PrefetchMode = %d\n", __func__, PrefetchMode); + dml_print("DML::%s: TWait = %f\n", __func__, TWait); +#endif + return TWait; +} // CalculateTWait + +// Function: get_return_bw_mbps +// Megabyte per second +double dml32_get_return_bw_mbps(const soc_bounding_box_st *soc, + const int VoltageLevel, + const bool HostVMEnable, + const double DCFCLK, + const double FabricClock, + const double DRAMSpeed) +{ + double ReturnBW = 0.; + double IdealSDPPortBandwidth = soc->return_bus_width_bytes /*mode_lib->vba.ReturnBusWidth*/ * DCFCLK; + double IdealFabricBandwidth = FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes; + double IdealDRAMBandwidth = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes; + double PixelDataOnlyReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100, + IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100, + IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe : + soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100); + double PixelMixedWithVMDataReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100, + IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100, + IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe : + soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100); + + if (HostVMEnable != true) + ReturnBW = PixelDataOnlyReturnBW; + else + ReturnBW = PixelMixedWithVMDataReturnBW; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel); + dml_print("DML::%s: HostVMEnable = %d\n", __func__, HostVMEnable); + dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK); + dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock); + dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed); + dml_print("DML::%s: IdealSDPPortBandwidth = %f\n", __func__, IdealSDPPortBandwidth); + dml_print("DML::%s: IdealFabricBandwidth = %f\n", __func__, IdealFabricBandwidth); + dml_print("DML::%s: IdealDRAMBandwidth = %f\n", __func__, IdealDRAMBandwidth); + dml_print("DML::%s: PixelDataOnlyReturnBW = %f\n", __func__, PixelDataOnlyReturnBW); + dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW); + dml_print("DML::%s: ReturnBW = %f MBps\n", __func__, ReturnBW); +#endif + return ReturnBW; +} + +// Function: get_return_bw_mbps_vm_only +// Megabyte per second +double dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st *soc, + const int VoltageLevel, + const double DCFCLK, + const double FabricClock, + const double DRAMSpeed) +{ + double VMDataOnlyReturnBW = dml_min3( + soc->return_bus_width_bytes * DCFCLK * soc->pct_ideal_sdp_bw_after_urgent / 100.0, + FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes + * soc->pct_ideal_sdp_bw_after_urgent / 100.0, + DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes + * (VoltageLevel < 2 ? + soc->pct_ideal_dram_bw_after_urgent_strobe : + soc->pct_ideal_dram_sdp_bw_after_urgent_vm_only) / 100.0); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel); + dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK); + dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock); + dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed); + dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW); +#endif + return VMDataOnlyReturnBW; +} + +double dml32_CalculateExtraLatency( + unsigned int RoundTripPingLatencyCycles, + unsigned int ReorderingBytes, + double DCFCLK, + unsigned int TotalNumberOfActiveDPP, + unsigned int PixelChunkSizeInKByte, + unsigned int TotalNumberOfDCCActiveDPP, + unsigned int MetaChunkSize, + double ReturnBW, + bool GPUVMEnable, + bool HostVMEnable, + unsigned int NumberOfActiveSurfaces, + unsigned int NumberOfDPP[], + unsigned int dpte_group_bytes[], + double HostVMInefficiencyFactor, + double HostVMMinPageSize, + unsigned int HostVMMaxNonCachedPageTableLevels) +{ + double ExtraLatencyBytes; + double ExtraLatency; + + ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes( + ReorderingBytes, + TotalNumberOfActiveDPP, + PixelChunkSizeInKByte, + TotalNumberOfDCCActiveDPP, + MetaChunkSize, + GPUVMEnable, + HostVMEnable, + NumberOfActiveSurfaces, + NumberOfDPP, + dpte_group_bytes, + HostVMInefficiencyFactor, + HostVMMinPageSize, + HostVMMaxNonCachedPageTableLevels); + + ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles); + dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK); + dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes); + dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW); + dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency); +#endif + + return ExtraLatency; +} // CalculateExtraLatency + +bool dml32_CalculatePrefetchSchedule( + struct vba_vars_st *v, + unsigned int k, + double HostVMInefficiencyFactor, + DmlPipe *myPipe, + unsigned int DSCDelay, + unsigned int DPP_RECOUT_WIDTH, + unsigned int VStartup, + unsigned int MaxVStartup, + double UrgentLatency, + double UrgentExtraLatency, + double TCalc, + unsigned int PDEAndMetaPTEBytesFrame, + unsigned int MetaRowByte, + unsigned int PixelPTEBytesPerRow, + double PrefetchSourceLinesY, + unsigned int SwathWidthY, + unsigned int VInitPreFillY, + unsigned int MaxNumSwathY, + double PrefetchSourceLinesC, + unsigned int SwathWidthC, + unsigned int VInitPreFillC, + unsigned int MaxNumSwathC, + unsigned int swath_width_luma_ub, + unsigned int swath_width_chroma_ub, + unsigned int SwathHeightY, + unsigned int SwathHeightC, + double TWait, + double TPreReq, + /* Output */ + double *DSTXAfterScaler, + double *DSTYAfterScaler, + double *DestinationLinesForPrefetch, + double *PrefetchBandwidth, + double *DestinationLinesToRequestVMInVBlank, + double *DestinationLinesToRequestRowInVBlank, + double *VRatioPrefetchY, + double *VRatioPrefetchC, + double *RequiredPrefetchPixDataBWLuma, + double *RequiredPrefetchPixDataBWChroma, + bool *NotEnoughTimeForDynamicMetadata, + double *Tno_bw, + double *prefetch_vmrow_bw, + double *Tdmdl_vm, + double *Tdmdl, + double *TSetup, + unsigned int *VUpdateOffsetPix, + double *VUpdateWidthPix, + double *VReadyOffsetPix) +{ + double DPPCLKDelaySubtotalPlusCNVCFormater = v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater; + bool MyError = false; + unsigned int DPPCycles, DISPCLKCycles; + double DSTTotalPixelsAfterScaler; + double LineTime; + double dst_y_prefetch_equ; + double prefetch_bw_oto; + double Tvm_oto; + double Tr0_oto; + double Tvm_oto_lines; + double Tr0_oto_lines; + double dst_y_prefetch_oto; + double TimeForFetchingMetaPTE = 0; + double TimeForFetchingRowInVBlank = 0; + double LinesToRequestPrefetchPixelData = 0; + double LinesForPrefetchBandwidth = 0; + unsigned int HostVMDynamicLevelsTrips; + double trip_to_mem; + double Tvm_trips; + double Tr0_trips; + double Tvm_trips_rounded; + double Tr0_trips_rounded; + double Lsw_oto; + double Tpre_rounded; + double prefetch_bw_equ; + double Tvm_equ; + double Tr0_equ; + double Tdmbf; + double Tdmec; + double Tdmsks; + double prefetch_sw_bytes; + double bytes_pp; + double dep_bytes; + unsigned int max_vratio_pre = __DML_MAX_VRATIO_PRE__; + double min_Lsw; + double Tsw_est1 = 0; + double Tsw_est3 = 0; + + if (v->GPUVMEnable == true && v->HostVMEnable == true) + HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels; + else + HostVMDynamicLevelsTrips = 0; +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: v->GPUVMEnable = %d\n", __func__, v->GPUVMEnable); + dml_print("DML::%s: v->GPUVMMaxPageTableLevels = %d\n", __func__, v->GPUVMMaxPageTableLevels); + dml_print("DML::%s: DCCEnable = %d\n", __func__, myPipe->DCCEnable); + dml_print("DML::%s: v->HostVMEnable=%d HostVMInefficiencyFactor=%f\n", + __func__, v->HostVMEnable, HostVMInefficiencyFactor); +#endif + dml32_CalculateVUpdateAndDynamicMetadataParameters( + v->MaxInterDCNTileRepeaters, + myPipe->Dppclk, + myPipe->Dispclk, + myPipe->DCFClkDeepSleep, + myPipe->PixelClock, + myPipe->HTotal, + myPipe->VBlank, + v->DynamicMetadataTransmittedBytes[k], + v->DynamicMetadataLinesBeforeActiveRequired[k], + myPipe->InterlaceEnable, + myPipe->ProgressiveToInterlaceUnitInOPP, + TSetup, + + /* output */ + &Tdmbf, + &Tdmec, + &Tdmsks, + VUpdateOffsetPix, + VUpdateWidthPix, + VReadyOffsetPix); + + LineTime = myPipe->HTotal / myPipe->PixelClock; + trip_to_mem = UrgentLatency; + Tvm_trips = UrgentExtraLatency + trip_to_mem * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1); + + if (v->DynamicMetadataVMEnabled == true) + *Tdmdl = TWait + Tvm_trips + trip_to_mem; + else + *Tdmdl = TWait + UrgentExtraLatency; + +#ifdef __DML_VBA_ALLOW_DELTA__ + if (v->DynamicMetadataEnable[k] == false) + *Tdmdl = 0.0; +#endif + + if (v->DynamicMetadataEnable[k] == true) { + if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) { + *NotEnoughTimeForDynamicMetadata = true; +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__); + dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", + __func__, Tdmbf); + dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec); + dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", + __func__, Tdmsks); + dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", + __func__, *Tdmdl); +#endif + } else { + *NotEnoughTimeForDynamicMetadata = false; + } + } else { + *NotEnoughTimeForDynamicMetadata = false; + } + + *Tdmdl_vm = (v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true && + v->GPUVMEnable == true ? TWait + Tvm_trips : 0); + + if (myPipe->ScalerEnabled) + DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCL; + else + DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCLLBOnly; + + DPPCycles = DPPCycles + myPipe->NumberOfCursors * v->DPPCLKDelayCNVCCursor; + + DISPCLKCycles = v->DISPCLKDelaySubtotal; + + if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0) + return true; + + *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->Dppclk + DISPCLKCycles * + myPipe->PixelClock / myPipe->Dispclk + DSCDelay; + + *DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0) + + (myPipe->DPPPerSurface - 1) * DPP_RECOUT_WIDTH + + ((myPipe->ODMMode == dm_odm_split_mode_1to2 || myPipe->ODMMode == dm_odm_mode_mso_1to2) ? + myPipe->HActive / 2 : 0) + + ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles); + dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock); + dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk); + dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles); + dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->Dispclk); + dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay); + dml_print("DML::%s: ODMMode: %d\n", __func__, myPipe->ODMMode); + dml_print("DML::%s: DPP_RECOUT_WIDTH: %d\n", __func__, DPP_RECOUT_WIDTH); + dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler); +#endif + + if (v->OutputFormat[k] == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP)) + *DSTYAfterScaler = 1; + else + *DSTYAfterScaler = 0; + + DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler; + *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1); + *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal)); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler); + dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler); +#endif + + MyError = false; + + Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1); + + if (v->GPUVMEnable == true) { + Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1.0) / 4.0 * LineTime; + Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime; + if (v->GPUVMMaxPageTableLevels >= 3) { + *Tno_bw = UrgentExtraLatency + trip_to_mem * + (double) ((v->GPUVMMaxPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1); + } else if (v->GPUVMMaxPageTableLevels == 1 && myPipe->DCCEnable != true) { + Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / LineTime, 1.0) / + 4.0 * LineTime; // VBA_ERROR + *Tno_bw = UrgentExtraLatency; + } else { + *Tno_bw = 0; + } + } else if (myPipe->DCCEnable == true) { + Tvm_trips_rounded = LineTime / 4.0; + Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime; + *Tno_bw = 0; + } else { + Tvm_trips_rounded = LineTime / 4.0; + Tr0_trips_rounded = LineTime / 2.0; + *Tno_bw = 0; + } + Tvm_trips_rounded = dml_max(Tvm_trips_rounded, LineTime / 4.0); + Tr0_trips_rounded = dml_max(Tr0_trips_rounded, LineTime / 4.0); + + if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 + || myPipe->SourcePixelFormat == dm_420_12) { + bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4; + } else { + bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC; + } + + prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC; + prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface, + prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime)); + + min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre; + min_Lsw = dml_max(min_Lsw, 1.0); + Lsw_oto = dml_ceil(4.0 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1.0) / 4.0; + + if (v->GPUVMEnable == true) { + Tvm_oto = dml_max3( + Tvm_trips, + *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, + LineTime / 4.0); + } else + Tvm_oto = LineTime / 4.0; + + if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) { + Tr0_oto = dml_max4( + Tr0_trips, + (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, + (LineTime - Tvm_oto)/2.0, + LineTime / 4.0); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__, + (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto); + dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, Tr0_trips); + dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, LineTime - Tvm_oto); + dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, LineTime / 4); +#endif + } else + Tr0_oto = (LineTime - Tvm_oto) / 2.0; + + Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0; + Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0; + dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto; + + dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - + (*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal); + + dst_y_prefetch_equ = dml_min(dst_y_prefetch_equ, __DML_VBA_MAX_DST_Y_PRE__); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal); + dml_print("DML::%s: min_Lsw = %f\n", __func__, min_Lsw); + dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw); + dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency); + dml_print("DML::%s: trip_to_mem = %f\n", __func__, trip_to_mem); + dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY); + dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); + dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); + dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC); + dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC); + dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub); + dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, prefetch_sw_bytes); + dml_print("DML::%s: bytes_pp = %f\n", __func__, bytes_pp); + dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); + dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); + dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); + dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); + dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips); + dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips); + dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto); + dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto); + dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto); + dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, Tvm_oto_lines); + dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, Tr0_oto_lines); + dml_print("DML::%s: Lsw_oto = %f\n", __func__, Lsw_oto); + dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, dst_y_prefetch_oto); + dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, dst_y_prefetch_equ); +#endif + + dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0; + Tpre_rounded = dst_y_prefetch_equ * LineTime; +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, dst_y_prefetch_equ); + dml_print("DML::%s: LineTime: %f\n", __func__, LineTime); + dml_print("DML::%s: VStartup: %d\n", __func__, VStartup); + dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n", + __func__, VStartup * LineTime); + dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup); + dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc); + dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf); + dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec); + dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm); + dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl); + dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n", + __func__, *DSTYAfterScaler); +#endif + dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, + MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor); + + if (prefetch_sw_bytes < dep_bytes) + prefetch_sw_bytes = 2 * dep_bytes; + + *PrefetchBandwidth = 0; + *DestinationLinesToRequestVMInVBlank = 0; + *DestinationLinesToRequestRowInVBlank = 0; + *VRatioPrefetchY = 0; + *VRatioPrefetchC = 0; + *RequiredPrefetchPixDataBWLuma = 0; + if (dst_y_prefetch_equ > 1 && + (Tpre_rounded >= TPreReq || dst_y_prefetch_equ == __DML_VBA_MAX_DST_Y_PRE__)) { + double PrefetchBandwidth1; + double PrefetchBandwidth2; + double PrefetchBandwidth3; + double PrefetchBandwidth4; + + if (Tpre_rounded - *Tno_bw > 0) { + PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor + + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw); + Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1; + } else + PrefetchBandwidth1 = 0; + + if (VStartup == MaxVStartup && (Tsw_est1 / LineTime < min_Lsw) + && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) { + PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) + / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw); + } + + if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0) + PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / + (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded); + else + PrefetchBandwidth2 = 0; + + if (Tpre_rounded - Tvm_trips_rounded > 0) { + PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor + + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded); + Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3; + } else + PrefetchBandwidth3 = 0; + + + if (VStartup == MaxVStartup && + (Tsw_est3 / LineTime < min_Lsw) && Tpre_rounded - min_Lsw * LineTime - 0.75 * + LineTime - Tvm_trips_rounded > 0) { + PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) + / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded); + } + + if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) { + PrefetchBandwidth4 = prefetch_sw_bytes / + (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded); + } else { + PrefetchBandwidth4 = 0; + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded); + dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw); + dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded); + dml_print("DML::%s: Tsw_est1: %f\n", __func__, Tsw_est1); + dml_print("DML::%s: Tsw_est3: %f\n", __func__, Tsw_est3); + dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1); + dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2); + dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3); + dml_print("DML::%s: PrefetchBandwidth4: %f\n", __func__, PrefetchBandwidth4); +#endif + { + bool Case1OK; + bool Case2OK; + bool Case3OK; + + if (PrefetchBandwidth1 > 0) { + if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 + >= Tvm_trips_rounded + && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) + / PrefetchBandwidth1 >= Tr0_trips_rounded) { + Case1OK = true; + } else { + Case1OK = false; + } + } else { + Case1OK = false; + } + + if (PrefetchBandwidth2 > 0) { + if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 + >= Tvm_trips_rounded + && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) + / PrefetchBandwidth2 < Tr0_trips_rounded) { + Case2OK = true; + } else { + Case2OK = false; + } + } else { + Case2OK = false; + } + + if (PrefetchBandwidth3 > 0) { + if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < + Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * + HostVMInefficiencyFactor) / PrefetchBandwidth3 >= + Tr0_trips_rounded) { + Case3OK = true; + } else { + Case3OK = false; + } + } else { + Case3OK = false; + } + + if (Case1OK) + prefetch_bw_equ = PrefetchBandwidth1; + else if (Case2OK) + prefetch_bw_equ = PrefetchBandwidth2; + else if (Case3OK) + prefetch_bw_equ = PrefetchBandwidth3; + else + prefetch_bw_equ = PrefetchBandwidth4; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK); + dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK); + dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK); + dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ); +#endif + + if (prefetch_bw_equ > 0) { + if (v->GPUVMEnable == true) { + Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * + HostVMInefficiencyFactor / prefetch_bw_equ, + Tvm_trips, LineTime / 4); + } else { + Tvm_equ = LineTime / 4; + } + + if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) { + Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow * + HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips, + (LineTime - Tvm_equ) / 2, LineTime / 4); + } else { + Tr0_equ = (LineTime - Tvm_equ) / 2; + } + } else { + Tvm_equ = 0; + Tr0_equ = 0; +#ifdef __DML_VBA_DEBUG__ + dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__); +#endif + } + } + + if (dst_y_prefetch_oto < dst_y_prefetch_equ) { + if (dst_y_prefetch_oto * LineTime < TPreReq) { + *DestinationLinesForPrefetch = dst_y_prefetch_equ; + } else { + *DestinationLinesForPrefetch = dst_y_prefetch_oto; + } + TimeForFetchingMetaPTE = Tvm_oto; + TimeForFetchingRowInVBlank = Tr0_oto; + *PrefetchBandwidth = prefetch_bw_oto; + /* Clamp to oto for bandwidth calculation */ + LinesForPrefetchBandwidth = dst_y_prefetch_oto; + } else { + *DestinationLinesForPrefetch = dst_y_prefetch_equ; + TimeForFetchingMetaPTE = Tvm_equ; + TimeForFetchingRowInVBlank = Tr0_equ; + *PrefetchBandwidth = prefetch_bw_equ; + /* Clamp to equ for bandwidth calculation */ + LinesForPrefetchBandwidth = dst_y_prefetch_equ; + } + + *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0; + + *DestinationLinesToRequestRowInVBlank = + dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0; + + LinesToRequestPrefetchPixelData = LinesForPrefetchBandwidth - + *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch); + dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", + __func__, *DestinationLinesToRequestVMInVBlank); + dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank); + dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); + dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", + __func__, *DestinationLinesToRequestRowInVBlank); + dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); + dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData); +#endif + + if (LinesToRequestPrefetchPixelData >= 1 && prefetch_bw_equ > 0) { + *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData; + *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); + dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY); + dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY); +#endif + if ((SwathHeightY > 4) && (VInitPreFillY > 3)) { + if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) { + *VRatioPrefetchY = + dml_max((double) PrefetchSourceLinesY / + LinesToRequestPrefetchPixelData, + (double) MaxNumSwathY * SwathHeightY / + (LinesToRequestPrefetchPixelData - + (VInitPreFillY - 3.0) / 2.0)); + *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); + } else { + MyError = true; + *VRatioPrefetchY = 0; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); + dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); + dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY); +#endif + } + + *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData; + *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC); + dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC); + dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC); +#endif + if ((SwathHeightC > 4)) { + if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) { + *VRatioPrefetchC = + dml_max(*VRatioPrefetchC, + (double) MaxNumSwathC * SwathHeightC / + (LinesToRequestPrefetchPixelData - + (VInitPreFillC - 3.0) / 2.0)); + *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); + } else { + MyError = true; + *VRatioPrefetchC = 0; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC); + dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC); + dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC); +#endif + } + + *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY + / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub + / LineTime; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY); + dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); + dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); + dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", + __func__, *RequiredPrefetchPixDataBWLuma); +#endif + *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / + LinesToRequestPrefetchPixelData + * myPipe->BytePerPixelC + * swath_width_chroma_ub / LineTime; + } else { + MyError = true; +#ifdef __DML_VBA_DEBUG__ + dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n", + __func__, LinesToRequestPrefetchPixelData); +#endif + *VRatioPrefetchY = 0; + *VRatioPrefetchC = 0; + *RequiredPrefetchPixDataBWLuma = 0; + *RequiredPrefetchPixDataBWChroma = 0; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n", + (double)LinesToRequestPrefetchPixelData * LineTime + + 2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE); + dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE); + dml_print("DML: To: %fus - time for propagation from scaler to optc\n", + (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime); + dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n"); + dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime - + TimeForFetchingMetaPTE - 2*TimeForFetchingRowInVBlank - (*DSTYAfterScaler + + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup); + dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", + PixelPTEBytesPerRow); +#endif + } else { + MyError = true; +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n", + __func__, dst_y_prefetch_equ); +#endif + } + + { + double prefetch_vm_bw; + double prefetch_row_bw; + + if (PDEAndMetaPTEBytesFrame == 0) { + prefetch_vm_bw = 0; + } else if (*DestinationLinesToRequestVMInVBlank > 0) { +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); + dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); + dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", + __func__, *DestinationLinesToRequestVMInVBlank); + dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); +#endif + prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / + (*DestinationLinesToRequestVMInVBlank * LineTime); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw); +#endif + } else { + prefetch_vm_bw = 0; + MyError = true; +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: MyErr set. DestinationLinesToRequestVMInVBlank=%f (should be > 0)\n", + __func__, *DestinationLinesToRequestVMInVBlank); +#endif + } + + if (MetaRowByte + PixelPTEBytesPerRow == 0) { + prefetch_row_bw = 0; + } else if (*DestinationLinesToRequestRowInVBlank > 0) { + prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / + (*DestinationLinesToRequestRowInVBlank * LineTime); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); + dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); + dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", + __func__, *DestinationLinesToRequestRowInVBlank); + dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw); +#endif + } else { + prefetch_row_bw = 0; + MyError = true; +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: MyErr set. DestinationLinesToRequestRowInVBlank=%f (should be > 0)\n", + __func__, *DestinationLinesToRequestRowInVBlank); +#endif + } + + *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw); + } + + if (MyError) { + *PrefetchBandwidth = 0; + TimeForFetchingMetaPTE = 0; + TimeForFetchingRowInVBlank = 0; + *DestinationLinesToRequestVMInVBlank = 0; + *DestinationLinesToRequestRowInVBlank = 0; + *DestinationLinesForPrefetch = 0; + LinesToRequestPrefetchPixelData = 0; + *VRatioPrefetchY = 0; + *VRatioPrefetchC = 0; + *RequiredPrefetchPixDataBWLuma = 0; + *RequiredPrefetchPixDataBWChroma = 0; + } + + return MyError; +} // CalculatePrefetchSchedule + +void dml32_CalculateFlipSchedule( + double HostVMInefficiencyFactor, + double UrgentExtraLatency, + double UrgentLatency, + unsigned int GPUVMMaxPageTableLevels, + bool HostVMEnable, + unsigned int HostVMMaxNonCachedPageTableLevels, + bool GPUVMEnable, + double HostVMMinPageSize, + double PDEAndMetaPTEBytesPerFrame, + double MetaRowBytes, + double DPTEBytesPerRow, + double BandwidthAvailableForImmediateFlip, + unsigned int TotImmediateFlipBytes, + enum source_format_class SourcePixelFormat, + double LineTime, + double VRatio, + double VRatioChroma, + double Tno_bw, + bool DCCEnable, + unsigned int dpte_row_height, + unsigned int meta_row_height, + unsigned int dpte_row_height_chroma, + unsigned int meta_row_height_chroma, + bool use_one_row_for_frame_flip, + + /* Output */ + double *DestinationLinesToRequestVMInImmediateFlip, + double *DestinationLinesToRequestRowInImmediateFlip, + double *final_flip_bw, + bool *ImmediateFlipSupportedForPipe) +{ + double min_row_time = 0.0; + unsigned int HostVMDynamicLevelsTrips; + double TimeForFetchingMetaPTEImmediateFlip; + double TimeForFetchingRowInVBlankImmediateFlip; + double ImmediateFlipBW; + + if (GPUVMEnable == true && HostVMEnable == true) + HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; + else + HostVMDynamicLevelsTrips = 0; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: TotImmediateFlipBytes = %d\n", __func__, TotImmediateFlipBytes); + dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip); +#endif + + if (TotImmediateFlipBytes > 0) { + if (use_one_row_for_frame_flip) { + ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + 2 * DPTEBytesPerRow) * + BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes; + } else { + ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * + BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes; + } + if (GPUVMEnable == true) { + TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame * + HostVMInefficiencyFactor / ImmediateFlipBW, + UrgentExtraLatency + UrgentLatency * + (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1), + LineTime / 4.0); + } else { + TimeForFetchingMetaPTEImmediateFlip = 0; + } + if ((GPUVMEnable == true || DCCEnable == true)) { + TimeForFetchingRowInVBlankImmediateFlip = dml_max3( + (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW, + UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4.0); + } else { + TimeForFetchingRowInVBlankImmediateFlip = 0; + } + + *DestinationLinesToRequestVMInImmediateFlip = + dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1.0) / 4.0; + *DestinationLinesToRequestRowInImmediateFlip = + dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1.0) / 4.0; + + if (GPUVMEnable == true) { + *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / + (*DestinationLinesToRequestVMInImmediateFlip * LineTime), + (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / + (*DestinationLinesToRequestRowInImmediateFlip * LineTime)); + } else if ((GPUVMEnable == true || DCCEnable == true)) { + *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / + (*DestinationLinesToRequestRowInImmediateFlip * LineTime); + } else { + *final_flip_bw = 0; + } + } else { + TimeForFetchingMetaPTEImmediateFlip = 0; + TimeForFetchingRowInVBlankImmediateFlip = 0; + *DestinationLinesToRequestVMInImmediateFlip = 0; + *DestinationLinesToRequestRowInImmediateFlip = 0; + *final_flip_bw = 0; + } + + if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) { + if (GPUVMEnable == true && DCCEnable != true) { + min_row_time = dml_min(dpte_row_height * + LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma); + } else if (GPUVMEnable != true && DCCEnable == true) { + min_row_time = dml_min(meta_row_height * + LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma); + } else { + min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height * + LineTime / VRatio, dpte_row_height_chroma * LineTime / + VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma); + } + } else { + if (GPUVMEnable == true && DCCEnable != true) { + min_row_time = dpte_row_height * LineTime / VRatio; + } else if (GPUVMEnable != true && DCCEnable == true) { + min_row_time = meta_row_height * LineTime / VRatio; + } else { + min_row_time = + dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio); + } + } + + if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16 + || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip + > min_row_time) { + *ImmediateFlipSupportedForPipe = false; + } else { + *ImmediateFlipSupportedForPipe = true; + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable); + dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable); + dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", + __func__, *DestinationLinesToRequestVMInImmediateFlip); + dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", + __func__, *DestinationLinesToRequestRowInImmediateFlip); + dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip); + dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", + __func__, TimeForFetchingRowInVBlankImmediateFlip); + dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time); + dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe); +#endif +} // CalculateFlipSchedule + +void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( + struct vba_vars_st *v, + unsigned int PrefetchMode, + double DCFCLK, + double ReturnBW, + SOCParametersList mmSOCParameters, + double SOCCLK, + double DCFClkDeepSleep, + unsigned int DETBufferSizeY[], + unsigned int DETBufferSizeC[], + unsigned int SwathHeightY[], + unsigned int SwathHeightC[], + double SwathWidthY[], + double SwathWidthC[], + unsigned int DPPPerSurface[], + double BytePerPixelDETY[], + double BytePerPixelDETC[], + double DSTXAfterScaler[], + double DSTYAfterScaler[], + bool UnboundedRequestEnabled, + unsigned int CompressedBufferSizeInkByte, + + /* Output */ + enum clock_change_support *DRAMClockChangeSupport, + double MaxActiveDRAMClockChangeLatencySupported[], + unsigned int SubViewportLinesNeededInMALL[], + enum dm_fclock_change_support *FCLKChangeSupport, + double *MinActiveFCLKChangeLatencySupported, + bool *USRRetrainingSupport, + double ActiveDRAMClockChangeLatencyMargin[]) +{ + unsigned int i, j, k; + unsigned int SurfaceWithMinActiveFCLKChangeMargin = 0; + unsigned int DRAMClockChangeSupportNumber = 0; + unsigned int LastSurfaceWithoutMargin; + unsigned int DRAMClockChangeMethod = 0; + bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false; + double MinActiveFCLKChangeMargin = 0.; + double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.; + double ActiveClockChangeLatencyHidingY; + double ActiveClockChangeLatencyHidingC; + double ActiveClockChangeLatencyHiding; + double EffectiveDETBufferSizeY; + double ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX]; + double USRRetrainingLatencyMargin[DC__NUM_DPP__MAX]; + double TotalPixelBW = 0.0; + bool SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX]; + double EffectiveLBLatencyHidingY; + double EffectiveLBLatencyHidingC; + double LinesInDETY[DC__NUM_DPP__MAX]; + double LinesInDETC[DC__NUM_DPP__MAX]; + unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX]; + unsigned int LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX]; + double FullDETBufferingTimeY; + double FullDETBufferingTimeC; + double WritebackDRAMClockChangeLatencyMargin; + double WritebackFCLKChangeLatencyMargin; + double WritebackLatencyHiding; + bool SameTimingForFCLKChange; + + unsigned int TotalActiveWriteback = 0; + unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX]; + unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX]; + + v->Watermark.UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency; + v->Watermark.USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency + + mmSOCParameters.USRRetrainingLatency + mmSOCParameters.SMNLatency; + v->Watermark.DRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + v->Watermark.UrgentWatermark; + v->Watermark.FCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + v->Watermark.UrgentWatermark; + v->Watermark.StutterExitWatermark = mmSOCParameters.SRExitTime + mmSOCParameters.ExtraLatency + + 10 / DCFClkDeepSleep; + v->Watermark.StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitTime + mmSOCParameters.ExtraLatency + + 10 / DCFClkDeepSleep; + v->Watermark.Z8StutterExitWatermark = mmSOCParameters.SRExitZ8Time + mmSOCParameters.ExtraLatency + + 10 / DCFClkDeepSleep; + v->Watermark.Z8StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitZ8Time + + mmSOCParameters.ExtraLatency + 10 / DCFClkDeepSleep; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: UrgentLatency = %f\n", __func__, mmSOCParameters.UrgentLatency); + dml_print("DML::%s: ExtraLatency = %f\n", __func__, mmSOCParameters.ExtraLatency); + dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, mmSOCParameters.DRAMClockChangeLatency); + dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->Watermark.UrgentWatermark); + dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, v->Watermark.USRRetrainingWatermark); + dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->Watermark.DRAMClockChangeWatermark); + dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, v->Watermark.FCLKChangeWatermark); + dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, v->Watermark.StutterExitWatermark); + dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, v->Watermark.StutterEnterPlusExitWatermark); + dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, v->Watermark.Z8StutterExitWatermark); + dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", + __func__, v->Watermark.Z8StutterEnterPlusExitWatermark); +#endif + + + TotalActiveWriteback = 0; + for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { + if (v->WritebackEnable[k] == true) + TotalActiveWriteback = TotalActiveWriteback + 1; + } + + if (TotalActiveWriteback <= 1) { + v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency; + } else { + v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency + + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; + } + if (v->USRRetrainingRequiredFinal) + v->Watermark.WritebackUrgentWatermark = v->Watermark.WritebackUrgentWatermark + + mmSOCParameters.USRRetrainingLatency; + + if (TotalActiveWriteback <= 1) { + v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + + mmSOCParameters.WritebackLatency; + v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + + mmSOCParameters.WritebackLatency; + } else { + v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + + mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; + v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + + mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024 / 32 / SOCCLK; + } + + if (v->USRRetrainingRequiredFinal) + v->Watermark.WritebackDRAMClockChangeWatermark = v->Watermark.WritebackDRAMClockChangeWatermark + + mmSOCParameters.USRRetrainingLatency; + + if (v->USRRetrainingRequiredFinal) + v->Watermark.WritebackFCLKChangeWatermark = v->Watermark.WritebackFCLKChangeWatermark + + mmSOCParameters.USRRetrainingLatency; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n", + __func__, v->Watermark.WritebackDRAMClockChangeWatermark); + dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, v->Watermark.WritebackFCLKChangeWatermark); + dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, v->Watermark.WritebackUrgentWatermark); + dml_print("DML::%s: v->USRRetrainingRequiredFinal = %d\n", __func__, v->USRRetrainingRequiredFinal); + dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, mmSOCParameters.USRRetrainingLatency); +#endif + + for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { + TotalPixelBW = TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] + + SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k]) / (v->HTotal[k] / v->PixelClock[k]); + } + + for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { + + LBLatencyHidingSourceLinesY[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1); + LBLatencyHidingSourceLinesC[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1); + + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%d, v->MaxLineBufferLines = %d\n", __func__, k, v->MaxLineBufferLines); + dml_print("DML::%s: k=%d, v->LineBufferSizeFinal = %d\n", __func__, k, v->LineBufferSizeFinal); + dml_print("DML::%s: k=%d, v->LBBitPerPixel = %d\n", __func__, k, v->LBBitPerPixel[k]); + dml_print("DML::%s: k=%d, v->HRatio = %f\n", __func__, k, v->HRatio[k]); + dml_print("DML::%s: k=%d, v->vtaps = %d\n", __func__, k, v->vtaps[k]); +#endif + + EffectiveLBLatencyHidingY = LBLatencyHidingSourceLinesY[k] / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]); + EffectiveLBLatencyHidingC = LBLatencyHidingSourceLinesC[k] / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]); + EffectiveDETBufferSizeY = DETBufferSizeY[k]; + + if (UnboundedRequestEnabled) { + EffectiveDETBufferSizeY = EffectiveDETBufferSizeY + + CompressedBufferSizeInkByte * 1024 + * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k]) + / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW; + } + + LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k]; + LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]); + FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k]; + + ActiveClockChangeLatencyHidingY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY + - (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k]; + + if (v->NumberOfActiveSurfaces > 1) { + ActiveClockChangeLatencyHidingY = ActiveClockChangeLatencyHidingY + - (1.0 - 1.0 / v->NumberOfActiveSurfaces) * SwathHeightY[k] * v->HTotal[k] + / v->PixelClock[k] / v->VRatio[k]; + } + + if (BytePerPixelDETC[k] > 0) { + LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k]; + LinesInDETCRoundedDownToSwath[k] = dml_floor(LinesInDETC[k], SwathHeightC[k]); + FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) + / v->VRatioChroma[k]; + ActiveClockChangeLatencyHidingC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC + - (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k] + / v->PixelClock[k]; + if (v->NumberOfActiveSurfaces > 1) { + ActiveClockChangeLatencyHidingC = ActiveClockChangeLatencyHidingC + - (1 - 1 / v->NumberOfActiveSurfaces) * SwathHeightC[k] * v->HTotal[k] + / v->PixelClock[k] / v->VRatioChroma[k]; + } + ActiveClockChangeLatencyHiding = dml_min(ActiveClockChangeLatencyHidingY, + ActiveClockChangeLatencyHidingC); + } else { + ActiveClockChangeLatencyHiding = ActiveClockChangeLatencyHidingY; + } + + ActiveDRAMClockChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark + - v->Watermark.DRAMClockChangeWatermark; + ActiveFCLKChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark + - v->Watermark.FCLKChangeWatermark; + USRRetrainingLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.USRRetrainingWatermark; + + if (v->WritebackEnable[k]) { + WritebackLatencyHiding = v->WritebackInterfaceBufferSize * 1024 + / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] + / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4); + if (v->WritebackPixelFormat[k] == dm_444_64) + WritebackLatencyHiding = WritebackLatencyHiding / 2; + + WritebackDRAMClockChangeLatencyMargin = WritebackLatencyHiding + - v->Watermark.WritebackDRAMClockChangeWatermark; + + WritebackFCLKChangeLatencyMargin = WritebackLatencyHiding + - v->Watermark.WritebackFCLKChangeWatermark; + + ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k], + WritebackFCLKChangeLatencyMargin); + ActiveFCLKChangeLatencyMargin[k] = dml_min(ActiveFCLKChangeLatencyMargin[k], + WritebackDRAMClockChangeLatencyMargin); + } + MaxActiveDRAMClockChangeLatencySupported[k] = + (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ? + 0 : + (ActiveDRAMClockChangeLatencyMargin[k] + + mmSOCParameters.DRAMClockChangeLatency); + } + + for (i = 0; i < v->NumberOfActiveSurfaces; ++i) { + for (j = 0; j < v->NumberOfActiveSurfaces; ++j) { + if (i == j || + (v->BlendingAndTiming[i] == i && v->BlendingAndTiming[j] == i) || + (v->BlendingAndTiming[j] == j && v->BlendingAndTiming[i] == j) || + (v->BlendingAndTiming[i] == v->BlendingAndTiming[j] && v->BlendingAndTiming[i] != i) || + (v->SynchronizeTimingsFinal && v->PixelClock[i] == v->PixelClock[j] && + v->HTotal[i] == v->HTotal[j] && v->VTotal[i] == v->VTotal[j] && + v->VActive[i] == v->VActive[j]) || (v->SynchronizeDRRDisplaysForUCLKPStateChangeFinal && + (v->DRRDisplay[i] || v->DRRDisplay[j]))) { + SynchronizedSurfaces[i][j] = true; + } else { + SynchronizedSurfaces[i][j] = false; + } + } + } + + for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { + if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && + (!FoundFirstSurfaceWithMinActiveFCLKChangeMargin || + ActiveFCLKChangeLatencyMargin[k] < MinActiveFCLKChangeMargin)) { + FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true; + MinActiveFCLKChangeMargin = ActiveFCLKChangeLatencyMargin[k]; + SurfaceWithMinActiveFCLKChangeMargin = k; + } + } + + *MinActiveFCLKChangeLatencySupported = MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency; + + SameTimingForFCLKChange = true; + for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { + if (!SynchronizedSurfaces[k][SurfaceWithMinActiveFCLKChangeMargin]) { + if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && + (SameTimingForFCLKChange || + ActiveFCLKChangeLatencyMargin[k] < + SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) { + SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = ActiveFCLKChangeLatencyMargin[k]; + } + SameTimingForFCLKChange = false; + } + } + + if (MinActiveFCLKChangeMargin > 0) { + *FCLKChangeSupport = dm_fclock_change_vactive; + } else if ((SameTimingForFCLKChange || SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) && + (PrefetchMode <= 1)) { + *FCLKChangeSupport = dm_fclock_change_vblank; + } else { + *FCLKChangeSupport = dm_fclock_change_unsupported; + } + + *USRRetrainingSupport = true; + for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { + if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && + (USRRetrainingLatencyMargin[k] < 0)) { + *USRRetrainingSupport = false; + } + } + + for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { + if (v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_full_frame && + v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_sub_viewport && + v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe && + ActiveDRAMClockChangeLatencyMargin[k] < 0) { + if (PrefetchMode > 0) { + DRAMClockChangeSupportNumber = 2; + } else if (DRAMClockChangeSupportNumber == 0) { + DRAMClockChangeSupportNumber = 1; + LastSurfaceWithoutMargin = k; + } else if (DRAMClockChangeSupportNumber == 1 && + !SynchronizedSurfaces[LastSurfaceWithoutMargin][k]) { + DRAMClockChangeSupportNumber = 2; + } + } + } + + for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { + if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame) + DRAMClockChangeMethod = 1; + else if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) + DRAMClockChangeMethod = 2; + } + + if (DRAMClockChangeMethod == 0) { + if (DRAMClockChangeSupportNumber == 0) + *DRAMClockChangeSupport = dm_dram_clock_change_vactive; + else if (DRAMClockChangeSupportNumber == 1) + *DRAMClockChangeSupport = dm_dram_clock_change_vblank; + else + *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; + } else if (DRAMClockChangeMethod == 1) { + if (DRAMClockChangeSupportNumber == 0) + *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_full_frame; + else if (DRAMClockChangeSupportNumber == 1) + *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_full_frame; + else + *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; + } else { + if (DRAMClockChangeSupportNumber == 0) + *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_sub_vp; + else if (DRAMClockChangeSupportNumber == 1) + *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_sub_vp; + else + *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; + } + + for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { + unsigned int dst_y_pstate; + unsigned int src_y_pstate_l; + unsigned int src_y_pstate_c; + unsigned int src_y_ahead_l, src_y_ahead_c, sub_vp_lines_l, sub_vp_lines_c; + + dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (v->HTotal[k] / v->PixelClock[k]), 1); + src_y_pstate_l = dml_ceil(dst_y_pstate * v->VRatio[k], SwathHeightY[k]); + src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + LBLatencyHidingSourceLinesY[k]; + sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + v->meta_row_height[k]; + +#ifdef __DML_VBA_DEBUG__ +dml_print("DML::%s: k=%d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]); +dml_print("DML::%s: k=%d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]); +dml_print("DML::%s: k=%d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]); +dml_print("DML::%s: k=%d, SwathHeightY = %d\n", __func__, k, SwathHeightY[k]); +dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY = %d\n", __func__, k, LBLatencyHidingSourceLinesY[k]); +dml_print("DML::%s: k=%d, dst_y_pstate = %d\n", __func__, k, dst_y_pstate); +dml_print("DML::%s: k=%d, src_y_pstate_l = %d\n", __func__, k, src_y_pstate_l); +dml_print("DML::%s: k=%d, src_y_ahead_l = %d\n", __func__, k, src_y_ahead_l); +dml_print("DML::%s: k=%d, v->meta_row_height = %d\n", __func__, k, v->meta_row_height[k]); +dml_print("DML::%s: k=%d, sub_vp_lines_l = %d\n", __func__, k, sub_vp_lines_l); +#endif + SubViewportLinesNeededInMALL[k] = sub_vp_lines_l; + + if (BytePerPixelDETC[k] > 0) { + src_y_pstate_c = dml_ceil(dst_y_pstate * v->VRatioChroma[k], SwathHeightC[k]); + src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + LBLatencyHidingSourceLinesC[k]; + sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + v->meta_row_height_chroma[k]; + SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c); + +#ifdef __DML_VBA_DEBUG__ +dml_print("DML::%s: k=%d, src_y_pstate_c = %d\n", __func__, k, src_y_pstate_c); +dml_print("DML::%s: k=%d, src_y_ahead_c = %d\n", __func__, k, src_y_ahead_c); +dml_print("DML::%s: k=%d, v->meta_row_height_chroma = %d\n", __func__, k, v->meta_row_height_chroma[k]); +dml_print("DML::%s: k=%d, sub_vp_lines_c = %d\n", __func__, k, sub_vp_lines_c); +#endif + } + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: DRAMClockChangeSupport = %d\n", __func__, *DRAMClockChangeSupport); + dml_print("DML::%s: FCLKChangeSupport = %d\n", __func__, *FCLKChangeSupport); + dml_print("DML::%s: MinActiveFCLKChangeLatencySupported = %f\n", + __func__, *MinActiveFCLKChangeLatencySupported); + dml_print("DML::%s: USRRetrainingSupport = %d\n", __func__, *USRRetrainingSupport); +#endif +} // CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport + +double dml32_CalculateWriteBackDISPCLK( + enum source_format_class WritebackPixelFormat, + double PixelClock, + double WritebackHRatio, + double WritebackVRatio, + unsigned int WritebackHTaps, + unsigned int WritebackVTaps, + unsigned int WritebackSourceWidth, + unsigned int WritebackDestinationWidth, + unsigned int HTotal, + unsigned int WritebackLineBufferSize, + double DISPCLKDPPCLKVCOSpeed) +{ + double DISPCLK_H, DISPCLK_V, DISPCLK_HB; + + DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio; + DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal; + DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * + WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth; + return dml32_RoundToDFSGranularity(dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB), 1, DISPCLKDPPCLKVCOSpeed); +} + +void dml32_CalculateMinAndMaxPrefetchMode( + enum dm_prefetch_modes AllowForPStateChangeOrStutterInVBlankFinal, + unsigned int *MinPrefetchMode, + unsigned int *MaxPrefetchMode) +{ + if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_none) { + *MinPrefetchMode = 3; + *MaxPrefetchMode = 3; + } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_stutter) { + *MinPrefetchMode = 2; + *MaxPrefetchMode = 2; + } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_fclk_and_stutter) { + *MinPrefetchMode = 1; + *MaxPrefetchMode = 1; + } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_uclk_fclk_and_stutter) { + *MinPrefetchMode = 0; + *MaxPrefetchMode = 0; + } else { + *MinPrefetchMode = 0; + *MaxPrefetchMode = 3; + } +} // CalculateMinAndMaxPrefetchMode + +void dml32_CalculatePixelDeliveryTimes( + unsigned int NumberOfActiveSurfaces, + double VRatio[], + double VRatioChroma[], + double VRatioPrefetchY[], + double VRatioPrefetchC[], + unsigned int swath_width_luma_ub[], + unsigned int swath_width_chroma_ub[], + unsigned int DPPPerSurface[], + double HRatio[], + double HRatioChroma[], + double PixelClock[], + double PSCL_THROUGHPUT[], + double PSCL_THROUGHPUT_CHROMA[], + double Dppclk[], + unsigned int BytePerPixelC[], + enum dm_rotation_angle SourceRotation[], + unsigned int NumberOfCursors[], + unsigned int CursorWidth[][DC__NUM_CURSOR__MAX], + unsigned int CursorBPP[][DC__NUM_CURSOR__MAX], + unsigned int BlockWidth256BytesY[], + unsigned int BlockHeight256BytesY[], + unsigned int BlockWidth256BytesC[], + unsigned int BlockHeight256BytesC[], + + /* Output */ + double DisplayPipeLineDeliveryTimeLuma[], + double DisplayPipeLineDeliveryTimeChroma[], + double DisplayPipeLineDeliveryTimeLumaPrefetch[], + double DisplayPipeLineDeliveryTimeChromaPrefetch[], + double DisplayPipeRequestDeliveryTimeLuma[], + double DisplayPipeRequestDeliveryTimeChroma[], + double DisplayPipeRequestDeliveryTimeLumaPrefetch[], + double DisplayPipeRequestDeliveryTimeChromaPrefetch[], + double CursorRequestDeliveryTime[], + double CursorRequestDeliveryTimePrefetch[]) +{ + double req_per_swath_ub; + unsigned int k; + + for (k = 0; k < NumberOfActiveSurfaces; ++k) { + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]); + dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]); + dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]); + dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]); + dml_print("DML::%s: k=%d : swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]); + dml_print("DML::%s: k=%d : swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]); + dml_print("DML::%s: k=%d : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]); + dml_print("DML::%s: k=%d : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]); + dml_print("DML::%s: k=%d : DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]); + dml_print("DML::%s: k=%d : PixelClock = %f\n", __func__, k, PixelClock[k]); + dml_print("DML::%s: k=%d : Dppclk = %f\n", __func__, k, Dppclk[k]); +#endif + + if (VRatio[k] <= 1) { + DisplayPipeLineDeliveryTimeLuma[k] = + swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k]; + } else { + DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; + } + + if (BytePerPixelC[k] == 0) { + DisplayPipeLineDeliveryTimeChroma[k] = 0; + } else { + if (VRatioChroma[k] <= 1) { + DisplayPipeLineDeliveryTimeChroma[k] = + swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k]; + } else { + DisplayPipeLineDeliveryTimeChroma[k] = + swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k]; + } + } + + if (VRatioPrefetchY[k] <= 1) { + DisplayPipeLineDeliveryTimeLumaPrefetch[k] = + swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k]; + } else { + DisplayPipeLineDeliveryTimeLumaPrefetch[k] = + swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; + } + + if (BytePerPixelC[k] == 0) { + DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0; + } else { + if (VRatioPrefetchC[k] <= 1) { + DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * + DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k]; + } else { + DisplayPipeLineDeliveryTimeChromaPrefetch[k] = + swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k]; + } + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", + __func__, k, DisplayPipeLineDeliveryTimeLuma[k]); + dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", + __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]); + dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", + __func__, k, DisplayPipeLineDeliveryTimeChroma[k]); + dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", + __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]); +#endif + } + + for (k = 0; k < NumberOfActiveSurfaces; ++k) { + if (!IsVertical(SourceRotation[k])) + req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k]; + else + req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k]; +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Luma)\n", __func__, k, req_per_swath_ub); +#endif + + DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub; + DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = + DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub; + if (BytePerPixelC[k] == 0) { + DisplayPipeRequestDeliveryTimeChroma[k] = 0; + DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0; + } else { + if (!IsVertical(SourceRotation[k])) + req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k]; + else + req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k]; +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Chroma)\n", __func__, k, req_per_swath_ub); +#endif + DisplayPipeRequestDeliveryTimeChroma[k] = + DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub; + DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = + DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", + __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]); + dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", + __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]); + dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", + __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]); + dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", + __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]); +#endif + } + + for (k = 0; k < NumberOfActiveSurfaces; ++k) { + unsigned int cursor_req_per_width; + + cursor_req_per_width = dml_ceil((double) CursorWidth[k][0] * (double) CursorBPP[k][0] / + 256.0 / 8.0, 1.0); + if (NumberOfCursors[k] > 0) { + if (VRatio[k] <= 1) { + CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] / + HRatio[k] / PixelClock[k] / cursor_req_per_width; + } else { + CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] / + PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width; + } + if (VRatioPrefetchY[k] <= 1) { + CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] / + HRatio[k] / PixelClock[k] / cursor_req_per_width; + } else { + CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] / + PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width; + } + } else { + CursorRequestDeliveryTime[k] = 0; + CursorRequestDeliveryTimePrefetch[k] = 0; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", + __func__, k, NumberOfCursors[k]); + dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", + __func__, k, CursorRequestDeliveryTime[k]); + dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", + __func__, k, CursorRequestDeliveryTimePrefetch[k]); +#endif + } +} // CalculatePixelDeliveryTimes + +void dml32_CalculateMetaAndPTETimes( + bool use_one_row_for_frame[], + unsigned int NumberOfActiveSurfaces, + bool GPUVMEnable, + unsigned int MetaChunkSize, + unsigned int MinMetaChunkSizeBytes, + unsigned int HTotal[], + double VRatio[], + double VRatioChroma[], + double DestinationLinesToRequestRowInVBlank[], + double DestinationLinesToRequestRowInImmediateFlip[], + bool DCCEnable[], + double PixelClock[], + unsigned int BytePerPixelY[], + unsigned int BytePerPixelC[], + enum dm_rotation_angle SourceRotation[], + unsigned int dpte_row_height[], + unsigned int dpte_row_height_chroma[], + unsigned int meta_row_width[], + unsigned int meta_row_width_chroma[], + unsigned int meta_row_height[], + unsigned int meta_row_height_chroma[], + unsigned int meta_req_width[], + unsigned int meta_req_width_chroma[], + unsigned int meta_req_height[], + unsigned int meta_req_height_chroma[], + unsigned int dpte_group_bytes[], + unsigned int PTERequestSizeY[], + unsigned int PTERequestSizeC[], + unsigned int PixelPTEReqWidthY[], + unsigned int PixelPTEReqHeightY[], + unsigned int PixelPTEReqWidthC[], + unsigned int PixelPTEReqHeightC[], + unsigned int dpte_row_width_luma_ub[], + unsigned int dpte_row_width_chroma_ub[], + + /* Output */ + double DST_Y_PER_PTE_ROW_NOM_L[], + double DST_Y_PER_PTE_ROW_NOM_C[], + double DST_Y_PER_META_ROW_NOM_L[], + double DST_Y_PER_META_ROW_NOM_C[], + double TimePerMetaChunkNominal[], + double TimePerChromaMetaChunkNominal[], + double TimePerMetaChunkVBlank[], + double TimePerChromaMetaChunkVBlank[], + double TimePerMetaChunkFlip[], + double TimePerChromaMetaChunkFlip[], + double time_per_pte_group_nom_luma[], + double time_per_pte_group_vblank_luma[], + double time_per_pte_group_flip_luma[], + double time_per_pte_group_nom_chroma[], + double time_per_pte_group_vblank_chroma[], + double time_per_pte_group_flip_chroma[]) +{ + unsigned int meta_chunk_width; + unsigned int min_meta_chunk_width; + unsigned int meta_chunk_per_row_int; + unsigned int meta_row_remainder; + unsigned int meta_chunk_threshold; + unsigned int meta_chunks_per_row_ub; + unsigned int meta_chunk_width_chroma; + unsigned int min_meta_chunk_width_chroma; + unsigned int meta_chunk_per_row_int_chroma; + unsigned int meta_row_remainder_chroma; + unsigned int meta_chunk_threshold_chroma; + unsigned int meta_chunks_per_row_ub_chroma; + unsigned int dpte_group_width_luma; + unsigned int dpte_groups_per_row_luma_ub; + unsigned int dpte_group_width_chroma; + unsigned int dpte_groups_per_row_chroma_ub; + unsigned int k; + + for (k = 0; k < NumberOfActiveSurfaces; ++k) { + DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k]; + if (BytePerPixelC[k] == 0) + DST_Y_PER_PTE_ROW_NOM_C[k] = 0; + else + DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k]; + DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k]; + if (BytePerPixelC[k] == 0) + DST_Y_PER_META_ROW_NOM_C[k] = 0; + else + DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k]; + } + + for (k = 0; k < NumberOfActiveSurfaces; ++k) { + if (DCCEnable[k] == true) { + meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k]; + min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k]; + meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width; + meta_row_remainder = meta_row_width[k] % meta_chunk_width; + if (!IsVertical(SourceRotation[k])) + meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k]; + else + meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k]; + + if (meta_row_remainder <= meta_chunk_threshold) + meta_chunks_per_row_ub = meta_chunk_per_row_int + 1; + else + meta_chunks_per_row_ub = meta_chunk_per_row_int + 2; + + TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * + HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; + TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * + HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; + TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * + HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; + if (BytePerPixelC[k] == 0) { + TimePerChromaMetaChunkNominal[k] = 0; + TimePerChromaMetaChunkVBlank[k] = 0; + TimePerChromaMetaChunkFlip[k] = 0; + } else { + meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / + meta_row_height_chroma[k]; + min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / + meta_row_height_chroma[k]; + meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / + meta_chunk_width_chroma; + meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma; + if (!IsVertical(SourceRotation[k])) { + meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - + meta_req_width_chroma[k]; + } else { + meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - + meta_req_height_chroma[k]; + } + if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) + meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1; + else + meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2; + + TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * + HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; + TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * + HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; + TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * + HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; + } + } else { + TimePerMetaChunkNominal[k] = 0; + TimePerMetaChunkVBlank[k] = 0; + TimePerMetaChunkFlip[k] = 0; + TimePerChromaMetaChunkNominal[k] = 0; + TimePerChromaMetaChunkVBlank[k] = 0; + TimePerChromaMetaChunkFlip[k] = 0; + } + } + + for (k = 0; k < NumberOfActiveSurfaces; ++k) { + if (GPUVMEnable == true) { + if (!IsVertical(SourceRotation[k])) { + dpte_group_width_luma = (double) dpte_group_bytes[k] / + (double) PTERequestSizeY[k] * PixelPTEReqWidthY[k]; + } else { + dpte_group_width_luma = (double) dpte_group_bytes[k] / + (double) PTERequestSizeY[k] * PixelPTEReqHeightY[k]; + } + + if (use_one_row_for_frame[k]) { + dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] / + (double) dpte_group_width_luma / 2.0, 1.0); + } else { + dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] / + (double) dpte_group_width_luma, 1.0); + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%0d, use_one_row_for_frame = %d\n", + __func__, k, use_one_row_for_frame[k]); + dml_print("DML::%s: k=%0d, dpte_group_bytes = %d\n", + __func__, k, dpte_group_bytes[k]); + dml_print("DML::%s: k=%0d, PTERequestSizeY = %d\n", + __func__, k, PTERequestSizeY[k]); + dml_print("DML::%s: k=%0d, PixelPTEReqWidthY = %d\n", + __func__, k, PixelPTEReqWidthY[k]); + dml_print("DML::%s: k=%0d, PixelPTEReqHeightY = %d\n", + __func__, k, PixelPTEReqHeightY[k]); + dml_print("DML::%s: k=%0d, dpte_row_width_luma_ub = %d\n", + __func__, k, dpte_row_width_luma_ub[k]); + dml_print("DML::%s: k=%0d, dpte_group_width_luma = %d\n", + __func__, k, dpte_group_width_luma); + dml_print("DML::%s: k=%0d, dpte_groups_per_row_luma_ub = %d\n", + __func__, k, dpte_groups_per_row_luma_ub); +#endif + + time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * + HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; + time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * + HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; + time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * + HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; + if (BytePerPixelC[k] == 0) { + time_per_pte_group_nom_chroma[k] = 0; + time_per_pte_group_vblank_chroma[k] = 0; + time_per_pte_group_flip_chroma[k] = 0; + } else { + if (!IsVertical(SourceRotation[k])) { + dpte_group_width_chroma = (double) dpte_group_bytes[k] / + (double) PTERequestSizeC[k] * PixelPTEReqWidthC[k]; + } else { + dpte_group_width_chroma = (double) dpte_group_bytes[k] / + (double) PTERequestSizeC[k] * PixelPTEReqHeightC[k]; + } + + if (use_one_row_for_frame[k]) { + dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] / + (double) dpte_group_width_chroma / 2.0, 1.0); + } else { + dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] / + (double) dpte_group_width_chroma, 1.0); + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%0d, dpte_row_width_chroma_ub = %d\n", + __func__, k, dpte_row_width_chroma_ub[k]); + dml_print("DML::%s: k=%0d, dpte_group_width_chroma = %d\n", + __func__, k, dpte_group_width_chroma); + dml_print("DML::%s: k=%0d, dpte_groups_per_row_chroma_ub = %d\n", + __func__, k, dpte_groups_per_row_chroma_ub); +#endif + time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * + HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; + time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * + HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; + time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * + HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; + } + } else { + time_per_pte_group_nom_luma[k] = 0; + time_per_pte_group_vblank_luma[k] = 0; + time_per_pte_group_flip_luma[k] = 0; + time_per_pte_group_nom_chroma[k] = 0; + time_per_pte_group_vblank_chroma[k] = 0; + time_per_pte_group_flip_chroma[k] = 0; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInVBlank = %f\n", + __func__, k, DestinationLinesToRequestRowInVBlank[k]); + dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInImmediateFlip = %f\n", + __func__, k, DestinationLinesToRequestRowInImmediateFlip[k]); + dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_L = %f\n", + __func__, k, DST_Y_PER_PTE_ROW_NOM_L[k]); + dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_C = %f\n", + __func__, k, DST_Y_PER_PTE_ROW_NOM_C[k]); + dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_L = %f\n", + __func__, k, DST_Y_PER_META_ROW_NOM_L[k]); + dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_C = %f\n", + __func__, k, DST_Y_PER_META_ROW_NOM_C[k]); + dml_print("DML::%s: k=%0d, TimePerMetaChunkNominal = %f\n", + __func__, k, TimePerMetaChunkNominal[k]); + dml_print("DML::%s: k=%0d, TimePerMetaChunkVBlank = %f\n", + __func__, k, TimePerMetaChunkVBlank[k]); + dml_print("DML::%s: k=%0d, TimePerMetaChunkFlip = %f\n", + __func__, k, TimePerMetaChunkFlip[k]); + dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkNominal = %f\n", + __func__, k, TimePerChromaMetaChunkNominal[k]); + dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkVBlank = %f\n", + __func__, k, TimePerChromaMetaChunkVBlank[k]); + dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkFlip = %f\n", + __func__, k, TimePerChromaMetaChunkFlip[k]); + dml_print("DML::%s: k=%0d, time_per_pte_group_nom_luma = %f\n", + __func__, k, time_per_pte_group_nom_luma[k]); + dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_luma = %f\n", + __func__, k, time_per_pte_group_vblank_luma[k]); + dml_print("DML::%s: k=%0d, time_per_pte_group_flip_luma = %f\n", + __func__, k, time_per_pte_group_flip_luma[k]); + dml_print("DML::%s: k=%0d, time_per_pte_group_nom_chroma = %f\n", + __func__, k, time_per_pte_group_nom_chroma[k]); + dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_chroma = %f\n", + __func__, k, time_per_pte_group_vblank_chroma[k]); + dml_print("DML::%s: k=%0d, time_per_pte_group_flip_chroma = %f\n", + __func__, k, time_per_pte_group_flip_chroma[k]); +#endif + } +} // CalculateMetaAndPTETimes + +void dml32_CalculateVMGroupAndRequestTimes( + unsigned int NumberOfActiveSurfaces, + bool GPUVMEnable, + unsigned int GPUVMMaxPageTableLevels, + unsigned int HTotal[], + unsigned int BytePerPixelC[], + double DestinationLinesToRequestVMInVBlank[], + double DestinationLinesToRequestVMInImmediateFlip[], + bool DCCEnable[], + double PixelClock[], + unsigned int dpte_row_width_luma_ub[], + unsigned int dpte_row_width_chroma_ub[], + unsigned int vm_group_bytes[], + unsigned int dpde0_bytes_per_frame_ub_l[], + unsigned int dpde0_bytes_per_frame_ub_c[], + unsigned int meta_pte_bytes_per_frame_ub_l[], + unsigned int meta_pte_bytes_per_frame_ub_c[], + + /* Output */ + double TimePerVMGroupVBlank[], + double TimePerVMGroupFlip[], + double TimePerVMRequestVBlank[], + double TimePerVMRequestFlip[]) +{ + unsigned int k; + unsigned int num_group_per_lower_vm_stage; + unsigned int num_req_per_lower_vm_stage; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces); + dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable); +#endif + for (k = 0; k < NumberOfActiveSurfaces; ++k) { + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%0d, DCCEnable = %d\n", __func__, k, DCCEnable[k]); + dml_print("DML::%s: k=%0d, vm_group_bytes = %d\n", __func__, k, vm_group_bytes[k]); + dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_l = %d\n", + __func__, k, dpde0_bytes_per_frame_ub_l[k]); + dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_c = %d\n", + __func__, k, dpde0_bytes_per_frame_ub_c[k]); + dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_l = %d\n", + __func__, k, meta_pte_bytes_per_frame_ub_l[k]); + dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_c = %d\n", + __func__, k, meta_pte_bytes_per_frame_ub_c[k]); +#endif + + if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) { + if (DCCEnable[k] == false) { + if (BytePerPixelC[k] > 0) { + num_group_per_lower_vm_stage = dml_ceil( + (double) (dpde0_bytes_per_frame_ub_l[k]) / + (double) (vm_group_bytes[k]), 1.0) + + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / + (double) (vm_group_bytes[k]), 1.0); + } else { + num_group_per_lower_vm_stage = dml_ceil( + (double) (dpde0_bytes_per_frame_ub_l[k]) / + (double) (vm_group_bytes[k]), 1.0); + } + } else { + if (GPUVMMaxPageTableLevels == 1) { + if (BytePerPixelC[k] > 0) { + num_group_per_lower_vm_stage = dml_ceil( + (double) (meta_pte_bytes_per_frame_ub_l[k]) / + (double) (vm_group_bytes[k]), 1.0) + + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / + (double) (vm_group_bytes[k]), 1.0); + } else { + num_group_per_lower_vm_stage = dml_ceil( + (double) (meta_pte_bytes_per_frame_ub_l[k]) / + (double) (vm_group_bytes[k]), 1.0); + } + } else { + if (BytePerPixelC[k] > 0) { + num_group_per_lower_vm_stage = 2 + dml_ceil( + (double) (dpde0_bytes_per_frame_ub_l[k]) / + (double) (vm_group_bytes[k]), 1) + + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / + (double) (vm_group_bytes[k]), 1) + + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / + (double) (vm_group_bytes[k]), 1) + + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / + (double) (vm_group_bytes[k]), 1); + } else { + num_group_per_lower_vm_stage = 1 + dml_ceil( + (double) (dpde0_bytes_per_frame_ub_l[k]) / + (double) (vm_group_bytes[k]), 1) + dml_ceil( + (double) (meta_pte_bytes_per_frame_ub_l[k]) / + (double) (vm_group_bytes[k]), 1); + } + } + } + + if (DCCEnable[k] == false) { + if (BytePerPixelC[k] > 0) { + num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + + dpde0_bytes_per_frame_ub_c[k] / 64; + } else { + num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64; + } + } else { + if (GPUVMMaxPageTableLevels == 1) { + if (BytePerPixelC[k] > 0) { + num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + + meta_pte_bytes_per_frame_ub_c[k] / 64; + } else { + num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64; + } + } else { + if (BytePerPixelC[k] > 0) { + num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / + 64 + dpde0_bytes_per_frame_ub_c[k] / 64 + + meta_pte_bytes_per_frame_ub_l[k] / 64 + + meta_pte_bytes_per_frame_ub_c[k] / 64; + } else { + num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / + 64 + meta_pte_bytes_per_frame_ub_l[k] / 64; + } + } + } + + TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * + HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; + TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * + HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; + TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * + HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; + TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * + HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; + + if (GPUVMMaxPageTableLevels > 2) { + TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2; + TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2; + TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2; + TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2; + } + + } else { + TimePerVMGroupVBlank[k] = 0; + TimePerVMGroupFlip[k] = 0; + TimePerVMRequestVBlank[k] = 0; + TimePerVMRequestFlip[k] = 0; + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%0d, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]); + dml_print("DML::%s: k=%0d, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]); + dml_print("DML::%s: k=%0d, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]); + dml_print("DML::%s: k=%0d, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]); +#endif + } +} // CalculateVMGroupAndRequestTimes + +void dml32_CalculateDCCConfiguration( + bool DCCEnabled, + bool DCCProgrammingAssumesScanDirectionUnknown, + enum source_format_class SourcePixelFormat, + unsigned int SurfaceWidthLuma, + unsigned int SurfaceWidthChroma, + unsigned int SurfaceHeightLuma, + unsigned int SurfaceHeightChroma, + unsigned int nomDETInKByte, + unsigned int RequestHeight256ByteLuma, + unsigned int RequestHeight256ByteChroma, + enum dm_swizzle_mode TilingFormat, + unsigned int BytePerPixelY, + unsigned int BytePerPixelC, + double BytePerPixelDETY, + double BytePerPixelDETC, + enum dm_rotation_angle SourceRotation, + /* Output */ + unsigned int *MaxUncompressedBlockLuma, + unsigned int *MaxUncompressedBlockChroma, + unsigned int *MaxCompressedBlockLuma, + unsigned int *MaxCompressedBlockChroma, + unsigned int *IndependentBlockLuma, + unsigned int *IndependentBlockChroma) +{ + typedef enum { + REQ_256Bytes, + REQ_128BytesNonContiguous, + REQ_128BytesContiguous, + REQ_NA + } RequestType; + + RequestType RequestLuma; + RequestType RequestChroma; + + unsigned int segment_order_horz_contiguous_luma; + unsigned int segment_order_horz_contiguous_chroma; + unsigned int segment_order_vert_contiguous_luma; + unsigned int segment_order_vert_contiguous_chroma; + unsigned int req128_horz_wc_l; + unsigned int req128_horz_wc_c; + unsigned int req128_vert_wc_l; + unsigned int req128_vert_wc_c; + unsigned int MAS_vp_horz_limit; + unsigned int MAS_vp_vert_limit; + unsigned int max_vp_horz_width; + unsigned int max_vp_vert_height; + unsigned int eff_surf_width_l; + unsigned int eff_surf_width_c; + unsigned int eff_surf_height_l; + unsigned int eff_surf_height_c; + unsigned int full_swath_bytes_horz_wc_l; + unsigned int full_swath_bytes_horz_wc_c; + unsigned int full_swath_bytes_vert_wc_l; + unsigned int full_swath_bytes_vert_wc_c; + unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024; + + unsigned int yuv420; + unsigned int horz_div_l; + unsigned int horz_div_c; + unsigned int vert_div_l; + unsigned int vert_div_c; + + unsigned int swath_buf_size; + double detile_buf_vp_horz_limit; + double detile_buf_vp_vert_limit; + + yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || + SourcePixelFormat == dm_420_12) ? 1 : 0); + horz_div_l = 1; + horz_div_c = 1; + vert_div_l = 1; + vert_div_c = 1; + + if (BytePerPixelY == 1) + vert_div_l = 0; + if (BytePerPixelC == 1) + vert_div_c = 0; + + if (BytePerPixelC == 0) { + swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256; + detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * + BytePerPixelY / (1 + horz_div_l)); + detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / + (1 + vert_div_l)); + } else { + swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256; + detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * + BytePerPixelY / (1 + horz_div_l) + (double) RequestHeight256ByteChroma * + BytePerPixelC / (1 + horz_div_c) / (1 + yuv420)); + detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / + (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / + (1 + vert_div_c) / (1 + yuv420)); + } + + if (SourcePixelFormat == dm_420_10) { + detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit; + detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit; + } + + detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16); + detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16); + + MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 6144; + MAS_vp_vert_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144); + max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit); + max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit); + eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma); + eff_surf_width_c = eff_surf_width_l / (1 + yuv420); + eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma); + eff_surf_height_c = eff_surf_height_l / (1 + yuv420); + + full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY; + full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma; + if (BytePerPixelC > 0) { + full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC; + full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma; + } else { + full_swath_bytes_horz_wc_c = 0; + full_swath_bytes_vert_wc_c = 0; + } + + if (SourcePixelFormat == dm_420_10) { + full_swath_bytes_horz_wc_l = dml_ceil((double) full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0); + full_swath_bytes_horz_wc_c = dml_ceil((double) full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0); + full_swath_bytes_vert_wc_l = dml_ceil((double) full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0); + full_swath_bytes_vert_wc_c = dml_ceil((double) full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0); + } + + if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { + req128_horz_wc_l = 0; + req128_horz_wc_c = 0; + } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + + full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { + req128_horz_wc_l = 0; + req128_horz_wc_c = 1; + } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * + full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { + req128_horz_wc_l = 1; + req128_horz_wc_c = 0; + } else { + req128_horz_wc_l = 1; + req128_horz_wc_c = 1; + } + + if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { + req128_vert_wc_l = 0; + req128_vert_wc_c = 0; + } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * + full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { + req128_vert_wc_l = 0; + req128_vert_wc_c = 1; + } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && + full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { + req128_vert_wc_l = 1; + req128_vert_wc_c = 0; + } else { + req128_vert_wc_l = 1; + req128_vert_wc_c = 1; + } + + if (BytePerPixelY == 2) { + segment_order_horz_contiguous_luma = 0; + segment_order_vert_contiguous_luma = 1; + } else { + segment_order_horz_contiguous_luma = 1; + segment_order_vert_contiguous_luma = 0; + } + + if (BytePerPixelC == 2) { + segment_order_horz_contiguous_chroma = 0; + segment_order_vert_contiguous_chroma = 1; + } else { + segment_order_horz_contiguous_chroma = 1; + segment_order_vert_contiguous_chroma = 0; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: DCCEnabled = %d\n", __func__, DCCEnabled); + dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte); + dml_print("DML::%s: DETBufferSizeForDCC = %d\n", __func__, DETBufferSizeForDCC); + dml_print("DML::%s: req128_horz_wc_l = %d\n", __func__, req128_horz_wc_l); + dml_print("DML::%s: req128_horz_wc_c = %d\n", __func__, req128_horz_wc_c); + dml_print("DML::%s: full_swath_bytes_horz_wc_l = %d\n", __func__, full_swath_bytes_horz_wc_l); + dml_print("DML::%s: full_swath_bytes_vert_wc_c = %d\n", __func__, full_swath_bytes_vert_wc_c); + dml_print("DML::%s: segment_order_horz_contiguous_luma = %d\n", __func__, segment_order_horz_contiguous_luma); + dml_print("DML::%s: segment_order_horz_contiguous_chroma = %d\n", + __func__, segment_order_horz_contiguous_chroma); +#endif + + if (DCCProgrammingAssumesScanDirectionUnknown == true) { + if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) + RequestLuma = REQ_256Bytes; + else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || + (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) + RequestLuma = REQ_128BytesNonContiguous; + else + RequestLuma = REQ_128BytesContiguous; + + if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) + RequestChroma = REQ_256Bytes; + else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || + (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) + RequestChroma = REQ_128BytesNonContiguous; + else + RequestChroma = REQ_128BytesContiguous; + + } else if (!IsVertical(SourceRotation)) { + if (req128_horz_wc_l == 0) + RequestLuma = REQ_256Bytes; + else if (segment_order_horz_contiguous_luma == 0) + RequestLuma = REQ_128BytesNonContiguous; + else + RequestLuma = REQ_128BytesContiguous; + + if (req128_horz_wc_c == 0) + RequestChroma = REQ_256Bytes; + else if (segment_order_horz_contiguous_chroma == 0) + RequestChroma = REQ_128BytesNonContiguous; + else + RequestChroma = REQ_128BytesContiguous; + + } else { + if (req128_vert_wc_l == 0) + RequestLuma = REQ_256Bytes; + else if (segment_order_vert_contiguous_luma == 0) + RequestLuma = REQ_128BytesNonContiguous; + else + RequestLuma = REQ_128BytesContiguous; + + if (req128_vert_wc_c == 0) + RequestChroma = REQ_256Bytes; + else if (segment_order_vert_contiguous_chroma == 0) + RequestChroma = REQ_128BytesNonContiguous; + else + RequestChroma = REQ_128BytesContiguous; + } + + if (RequestLuma == REQ_256Bytes) { + *MaxUncompressedBlockLuma = 256; + *MaxCompressedBlockLuma = 256; + *IndependentBlockLuma = 0; + } else if (RequestLuma == REQ_128BytesContiguous) { + *MaxUncompressedBlockLuma = 256; + *MaxCompressedBlockLuma = 128; + *IndependentBlockLuma = 128; + } else { + *MaxUncompressedBlockLuma = 256; + *MaxCompressedBlockLuma = 64; + *IndependentBlockLuma = 64; + } + + if (RequestChroma == REQ_256Bytes) { + *MaxUncompressedBlockChroma = 256; + *MaxCompressedBlockChroma = 256; + *IndependentBlockChroma = 0; + } else if (RequestChroma == REQ_128BytesContiguous) { + *MaxUncompressedBlockChroma = 256; + *MaxCompressedBlockChroma = 128; + *IndependentBlockChroma = 128; + } else { + *MaxUncompressedBlockChroma = 256; + *MaxCompressedBlockChroma = 64; + *IndependentBlockChroma = 64; + } + + if (DCCEnabled != true || BytePerPixelC == 0) { + *MaxUncompressedBlockChroma = 0; + *MaxCompressedBlockChroma = 0; + *IndependentBlockChroma = 0; + } + + if (DCCEnabled != true) { + *MaxUncompressedBlockLuma = 0; + *MaxCompressedBlockLuma = 0; + *IndependentBlockLuma = 0; + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: MaxUncompressedBlockLuma = %d\n", __func__, *MaxUncompressedBlockLuma); + dml_print("DML::%s: MaxCompressedBlockLuma = %d\n", __func__, *MaxCompressedBlockLuma); + dml_print("DML::%s: IndependentBlockLuma = %d\n", __func__, *IndependentBlockLuma); + dml_print("DML::%s: MaxUncompressedBlockChroma = %d\n", __func__, *MaxUncompressedBlockChroma); + dml_print("DML::%s: MaxCompressedBlockChroma = %d\n", __func__, *MaxCompressedBlockChroma); + dml_print("DML::%s: IndependentBlockChroma = %d\n", __func__, *IndependentBlockChroma); +#endif + +} // CalculateDCCConfiguration + +void dml32_CalculateStutterEfficiency( + unsigned int CompressedBufferSizeInkByte, + enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], + bool UnboundedRequestEnabled, + unsigned int MetaFIFOSizeInKEntries, + unsigned int ZeroSizeBufferEntries, + unsigned int PixelChunkSizeInKByte, + unsigned int NumberOfActiveSurfaces, + unsigned int ROBBufferSizeInKByte, + double TotalDataReadBandwidth, + double DCFCLK, + double ReturnBW, + unsigned int CompbufReservedSpace64B, + unsigned int CompbufReservedSpaceZs, + double SRExitTime, + double SRExitZ8Time, + bool SynchronizeTimingsFinal, + unsigned int BlendingAndTiming[], + double StutterEnterPlusExitWatermark, + double Z8StutterEnterPlusExitWatermark, + bool ProgressiveToInterlaceUnitInOPP, + bool Interlace[], + double MinTTUVBlank[], + unsigned int DPPPerSurface[], + unsigned int DETBufferSizeY[], + unsigned int BytePerPixelY[], + double BytePerPixelDETY[], + double SwathWidthY[], + unsigned int SwathHeightY[], + unsigned int SwathHeightC[], + double NetDCCRateLuma[], + double NetDCCRateChroma[], + double DCCFractionOfZeroSizeRequestsLuma[], + double DCCFractionOfZeroSizeRequestsChroma[], + unsigned int HTotal[], + unsigned int VTotal[], + double PixelClock[], + double VRatio[], + enum dm_rotation_angle SourceRotation[], + unsigned int BlockHeight256BytesY[], + unsigned int BlockWidth256BytesY[], + unsigned int BlockHeight256BytesC[], + unsigned int BlockWidth256BytesC[], + unsigned int DCCYMaxUncompressedBlock[], + unsigned int DCCCMaxUncompressedBlock[], + unsigned int VActive[], + bool DCCEnable[], + bool WritebackEnable[], + double ReadBandwidthSurfaceLuma[], + double ReadBandwidthSurfaceChroma[], + double meta_row_bw[], + double dpte_row_bw[], + + /* Output */ + double *StutterEfficiencyNotIncludingVBlank, + double *StutterEfficiency, + unsigned int *NumberOfStutterBurstsPerFrame, + double *Z8StutterEfficiencyNotIncludingVBlank, + double *Z8StutterEfficiency, + unsigned int *Z8NumberOfStutterBurstsPerFrame, + double *StutterPeriod, + bool *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE) +{ + + bool FoundCriticalSurface = false; + unsigned int SwathSizeCriticalSurface = 0; + unsigned int LastChunkOfSwathSize; + unsigned int MissingPartOfLastSwathOfDETSize; + double LastZ8StutterPeriod = 0.0; + double LastStutterPeriod = 0.0; + unsigned int TotalNumberOfActiveOTG = 0; + double doublePixelClock; + unsigned int doubleHTotal; + unsigned int doubleVTotal; + bool SameTiming = true; + double DETBufferingTimeY; + double SwathWidthYCriticalSurface = 0.0; + double SwathHeightYCriticalSurface = 0.0; + double VActiveTimeCriticalSurface = 0.0; + double FrameTimeCriticalSurface = 0.0; + unsigned int BytePerPixelYCriticalSurface = 0; + double LinesToFinishSwathTransferStutterCriticalSurface = 0.0; + unsigned int DETBufferSizeYCriticalSurface = 0; + double MinTTUVBlankCriticalSurface = 0.0; + unsigned int BlockWidth256BytesYCriticalSurface = 0; + bool doublePlaneCriticalSurface = 0; + bool doublePipeCriticalSurface = 0; + double TotalCompressedReadBandwidth; + double TotalRowReadBandwidth; + double AverageDCCCompressionRate; + double EffectiveCompressedBufferSize; + double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer; + double StutterBurstTime; + unsigned int TotalActiveWriteback; + double LinesInDETY; + double LinesInDETYRoundedDownToSwath; + double MaximumEffectiveCompressionLuma; + double MaximumEffectiveCompressionChroma; + double TotalZeroSizeRequestReadBandwidth; + double TotalZeroSizeCompressedReadBandwidth; + double AverageDCCZeroSizeFraction; + double AverageZeroSizeCompressionRate; + unsigned int k; + + TotalZeroSizeRequestReadBandwidth = 0; + TotalZeroSizeCompressedReadBandwidth = 0; + TotalRowReadBandwidth = 0; + TotalCompressedReadBandwidth = 0; + + for (k = 0; k < NumberOfActiveSurfaces; ++k) { + if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) { + if (DCCEnable[k] == true) { + if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesY[k] > SwathHeightY[k]) + || (!IsVertical(SourceRotation[k]) + && BlockHeight256BytesY[k] > SwathHeightY[k]) + || DCCYMaxUncompressedBlock[k] < 256) { + MaximumEffectiveCompressionLuma = 2; + } else { + MaximumEffectiveCompressionLuma = 4; + } + TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + + ReadBandwidthSurfaceLuma[k] + / dml_min(NetDCCRateLuma[k], + MaximumEffectiveCompressionLuma); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n", + __func__, k, ReadBandwidthSurfaceLuma[k]); + dml_print("DML::%s: k=%0d, NetDCCRateLuma = %f\n", + __func__, k, NetDCCRateLuma[k]); + dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionLuma = %f\n", + __func__, k, MaximumEffectiveCompressionLuma); +#endif + TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]; + TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth + + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] + / MaximumEffectiveCompressionLuma; + + if (ReadBandwidthSurfaceChroma[k] > 0) { + if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesC[k] > SwathHeightC[k]) + || (!IsVertical(SourceRotation[k]) + && BlockHeight256BytesC[k] > SwathHeightC[k]) + || DCCCMaxUncompressedBlock[k] < 256) { + MaximumEffectiveCompressionChroma = 2; + } else { + MaximumEffectiveCompressionChroma = 4; + } + TotalCompressedReadBandwidth = + TotalCompressedReadBandwidth + + ReadBandwidthSurfaceChroma[k] + / dml_min(NetDCCRateChroma[k], + MaximumEffectiveCompressionChroma); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceChroma = %f\n", + __func__, k, ReadBandwidthSurfaceChroma[k]); + dml_print("DML::%s: k=%0d, NetDCCRateChroma = %f\n", + __func__, k, NetDCCRateChroma[k]); + dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionChroma = %f\n", + __func__, k, MaximumEffectiveCompressionChroma); +#endif + TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + + ReadBandwidthSurfaceChroma[k] + * DCCFractionOfZeroSizeRequestsChroma[k]; + TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth + + ReadBandwidthSurfaceChroma[k] + * DCCFractionOfZeroSizeRequestsChroma[k] + / MaximumEffectiveCompressionChroma; + } + } else { + TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + + ReadBandwidthSurfaceLuma[k] + ReadBandwidthSurfaceChroma[k]; + } + TotalRowReadBandwidth = TotalRowReadBandwidth + + DPPPerSurface[k] * (meta_row_bw[k] + dpte_row_bw[k]); + } + } + + AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth; + AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled); + dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth); + dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth); + dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", + __func__, TotalZeroSizeCompressedReadBandwidth); + dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma); + dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma); + dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); + dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction); + dml_print("DML::%s: CompbufReservedSpace64B = %d\n", __func__, CompbufReservedSpace64B); + dml_print("DML::%s: CompbufReservedSpaceZs = %d\n", __func__, CompbufReservedSpaceZs); + dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte); +#endif + if (AverageDCCZeroSizeFraction == 1) { + AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth + / TotalZeroSizeCompressedReadBandwidth; + EffectiveCompressedBufferSize = (double) MetaFIFOSizeInKEntries * 1024 * 64 + * AverageZeroSizeCompressionRate + + ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 + * AverageZeroSizeCompressionRate; + } else if (AverageDCCZeroSizeFraction > 0) { + AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth + / TotalZeroSizeCompressedReadBandwidth; + EffectiveCompressedBufferSize = dml_min( + (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, + (double) MetaFIFOSizeInKEntries * 1024 * 64 + / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + + 1 / AverageDCCCompressionRate)) + + dml_min(((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64) + * AverageDCCCompressionRate, + ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 + / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: min 1 = %f\n", __func__, + CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); + dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 / + (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / + AverageDCCCompressionRate)); + dml_print("DML::%s: min 3 = %f\n", __func__, (ROBBufferSizeInKByte * 1024 - + CompbufReservedSpace64B * 64) * AverageDCCCompressionRate); + dml_print("DML::%s: min 4 = %f\n", __func__, (ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 / + (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); +#endif + } else { + EffectiveCompressedBufferSize = dml_min( + (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, + (double) MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) + + ((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64) + * AverageDCCCompressionRate; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: min 1 = %f\n", __func__, + CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); + dml_print("DML::%s: min 2 = %f\n", __func__, + MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate); +#endif + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries); + dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate); + dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); +#endif + + *StutterPeriod = 0; + + for (k = 0; k < NumberOfActiveSurfaces; ++k) { + if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) { + LinesInDETY = ((double) DETBufferSizeY[k] + + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) + * ReadBandwidthSurfaceLuma[k] / TotalDataReadBandwidth) + / BytePerPixelDETY[k] / SwathWidthY[k]; + LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]); + DETBufferingTimeY = LinesInDETYRoundedDownToSwath * ((double) HTotal[k] / PixelClock[k]) + / VRatio[k]; +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%0d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]); + dml_print("DML::%s: k=%0d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]); + dml_print("DML::%s: k=%0d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]); + dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n", + __func__, k, ReadBandwidthSurfaceLuma[k]); + dml_print("DML::%s: k=%0d, TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth); + dml_print("DML::%s: k=%0d, LinesInDETY = %f\n", __func__, k, LinesInDETY); + dml_print("DML::%s: k=%0d, LinesInDETYRoundedDownToSwath = %f\n", + __func__, k, LinesInDETYRoundedDownToSwath); + dml_print("DML::%s: k=%0d, HTotal = %d\n", __func__, k, HTotal[k]); + dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]); + dml_print("DML::%s: k=%0d, VRatio = %f\n", __func__, k, VRatio[k]); + dml_print("DML::%s: k=%0d, DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY); + dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]); +#endif + + if (!FoundCriticalSurface || DETBufferingTimeY < *StutterPeriod) { + bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP; + + FoundCriticalSurface = true; + *StutterPeriod = DETBufferingTimeY; + FrameTimeCriticalSurface = ( + isInterlaceTiming ? + dml_floor((double) VTotal[k] / 2.0, 1.0) : VTotal[k]) + * (double) HTotal[k] / PixelClock[k]; + VActiveTimeCriticalSurface = ( + isInterlaceTiming ? + dml_floor((double) VActive[k] / 2.0, 1.0) : VActive[k]) + * (double) HTotal[k] / PixelClock[k]; + BytePerPixelYCriticalSurface = BytePerPixelY[k]; + SwathWidthYCriticalSurface = SwathWidthY[k]; + SwathHeightYCriticalSurface = SwathHeightY[k]; + BlockWidth256BytesYCriticalSurface = BlockWidth256BytesY[k]; + LinesToFinishSwathTransferStutterCriticalSurface = SwathHeightY[k] + - (LinesInDETY - LinesInDETYRoundedDownToSwath); + DETBufferSizeYCriticalSurface = DETBufferSizeY[k]; + MinTTUVBlankCriticalSurface = MinTTUVBlank[k]; + doublePlaneCriticalSurface = (ReadBandwidthSurfaceChroma[k] == 0); + doublePipeCriticalSurface = (DPPPerSurface[k] == 1); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%0d, FoundCriticalSurface = %d\n", + __func__, k, FoundCriticalSurface); + dml_print("DML::%s: k=%0d, StutterPeriod = %f\n", + __func__, k, *StutterPeriod); + dml_print("DML::%s: k=%0d, MinTTUVBlankCriticalSurface = %f\n", + __func__, k, MinTTUVBlankCriticalSurface); + dml_print("DML::%s: k=%0d, FrameTimeCriticalSurface = %f\n", + __func__, k, FrameTimeCriticalSurface); + dml_print("DML::%s: k=%0d, VActiveTimeCriticalSurface = %f\n", + __func__, k, VActiveTimeCriticalSurface); + dml_print("DML::%s: k=%0d, BytePerPixelYCriticalSurface = %d\n", + __func__, k, BytePerPixelYCriticalSurface); + dml_print("DML::%s: k=%0d, SwathWidthYCriticalSurface = %f\n", + __func__, k, SwathWidthYCriticalSurface); + dml_print("DML::%s: k=%0d, SwathHeightYCriticalSurface = %f\n", + __func__, k, SwathHeightYCriticalSurface); + dml_print("DML::%s: k=%0d, BlockWidth256BytesYCriticalSurface = %d\n", + __func__, k, BlockWidth256BytesYCriticalSurface); + dml_print("DML::%s: k=%0d, doublePlaneCriticalSurface = %d\n", + __func__, k, doublePlaneCriticalSurface); + dml_print("DML::%s: k=%0d, doublePipeCriticalSurface = %d\n", + __func__, k, doublePipeCriticalSurface); + dml_print("DML::%s: k=%0d, LinesToFinishSwathTransferStutterCriticalSurface = %f\n", + __func__, k, LinesToFinishSwathTransferStutterCriticalSurface); +#endif + } + } + } + + PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth, + EffectiveCompressedBufferSize); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte); + dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); + dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", + __func__, *StutterPeriod * TotalDataReadBandwidth); + dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); + dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, + PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer); + dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW); + dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth); + dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth); + dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK); +#endif + + StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate + / ReturnBW + + (*StutterPeriod * TotalDataReadBandwidth + - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64) + + *StutterPeriod * TotalRowReadBandwidth / ReturnBW; +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / + AverageDCCCompressionRate / ReturnBW); + dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", + __func__, (*StutterPeriod * TotalDataReadBandwidth)); + dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - + PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)); + dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW); + dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); +#endif + StutterBurstTime = dml_max(StutterBurstTime, + LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface + * SwathWidthYCriticalSurface / ReturnBW); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: Time to finish residue swath=%f\n", + __func__, + LinesToFinishSwathTransferStutterCriticalSurface * + BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / ReturnBW); +#endif + + TotalActiveWriteback = 0; + for (k = 0; k < NumberOfActiveSurfaces; ++k) { + if (WritebackEnable[k]) + TotalActiveWriteback = TotalActiveWriteback + 1; + } + + if (TotalActiveWriteback == 0) { +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime); + dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time); + dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime); + dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); +#endif + *StutterEfficiencyNotIncludingVBlank = dml_max(0., + 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100; + *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., + 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100; + *NumberOfStutterBurstsPerFrame = ( + *StutterEfficiencyNotIncludingVBlank > 0 ? + dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0); + *Z8NumberOfStutterBurstsPerFrame = ( + *Z8StutterEfficiencyNotIncludingVBlank > 0 ? + dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0); + } else { + *StutterEfficiencyNotIncludingVBlank = 0.; + *Z8StutterEfficiencyNotIncludingVBlank = 0.; + *NumberOfStutterBurstsPerFrame = 0; + *Z8NumberOfStutterBurstsPerFrame = 0; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, VActiveTimeCriticalSurface); + dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", + __func__, *StutterEfficiencyNotIncludingVBlank); + dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", + __func__, *Z8StutterEfficiencyNotIncludingVBlank); + dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame); + dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame); +#endif + + for (k = 0; k < NumberOfActiveSurfaces; ++k) { + if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) { + if (BlendingAndTiming[k] == k) { + if (TotalNumberOfActiveOTG == 0) { + doublePixelClock = PixelClock[k]; + doubleHTotal = HTotal[k]; + doubleVTotal = VTotal[k]; + } else if (doublePixelClock != PixelClock[k] || doubleHTotal != HTotal[k] + || doubleVTotal != VTotal[k]) { + SameTiming = false; + } + TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1; + } + } + } + + if (*StutterEfficiencyNotIncludingVBlank > 0) { + LastStutterPeriod = VActiveTimeCriticalSurface - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod; + + if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming + && LastStutterPeriod + MinTTUVBlankCriticalSurface > StutterEnterPlusExitWatermark) { + *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime + + StutterBurstTime * VActiveTimeCriticalSurface + / *StutterPeriod) / FrameTimeCriticalSurface) * 100; + } else { + *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank; + } + } else { + *StutterEfficiency = 0; + } + + if (*Z8StutterEfficiencyNotIncludingVBlank > 0) { + LastZ8StutterPeriod = VActiveTimeCriticalSurface + - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod; + if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming && LastZ8StutterPeriod + + MinTTUVBlankCriticalSurface > Z8StutterEnterPlusExitWatermark) { + *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime + * VActiveTimeCriticalSurface / *StutterPeriod) / FrameTimeCriticalSurface) * 100; + } else { + *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank; + } + } else { + *Z8StutterEfficiency = 0.; + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod); + dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark); + dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); + dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); + dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency); + dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency); + dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", + __func__, *StutterEfficiencyNotIncludingVBlank); + dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame); +#endif + + SwathSizeCriticalSurface = BytePerPixelYCriticalSurface * SwathHeightYCriticalSurface + * dml_ceil(SwathWidthYCriticalSurface, BlockWidth256BytesYCriticalSurface); + LastChunkOfSwathSize = SwathSizeCriticalSurface % (PixelChunkSizeInKByte * 1024); + MissingPartOfLastSwathOfDETSize = dml_ceil(DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface) + - DETBufferSizeYCriticalSurface; + + *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!UnboundedRequestEnabled && (NumberOfActiveSurfaces == 1) + && doublePlaneCriticalSurface && doublePipeCriticalSurface && (LastChunkOfSwathSize > 0) + && (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0) + && (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize)); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: SwathSizeCriticalSurface = %d\n", __func__, SwathSizeCriticalSurface); + dml_print("DML::%s: LastChunkOfSwathSize = %d\n", __func__, LastChunkOfSwathSize); + dml_print("DML::%s: MissingPartOfLastSwathOfDETSize = %d\n", __func__, MissingPartOfLastSwathOfDETSize); + dml_print("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %d\n", __func__, *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE); +#endif +} // CalculateStutterEfficiency + +void dml32_CalculateMaxDETAndMinCompressedBufferSize( + unsigned int ConfigReturnBufferSizeInKByte, + unsigned int ROBBufferSizeInKByte, + unsigned int MaxNumDPP, + bool nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size + unsigned int nomDETInKByteOverrideValue, // VBA_DELTA + + /* Output */ + unsigned int *MaxTotalDETInKByte, + unsigned int *nomDETInKByte, + unsigned int *MinCompressedBufferSizeInKByte) +{ + bool det_buff_size_override_en = nomDETInKByteOverrideEnable; + unsigned int det_buff_size_override_val = nomDETInKByteOverrideValue; + + *MaxTotalDETInKByte = dml_ceil(((double)ConfigReturnBufferSizeInKByte + + (double) ROBBufferSizeInKByte) * 4.0 / 5.0, 64); + *nomDETInKByte = dml_floor((double) *MaxTotalDETInKByte / (double) MaxNumDPP, 64); + *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %0d\n", __func__, ConfigReturnBufferSizeInKByte); + dml_print("DML::%s: ROBBufferSizeInKByte = %0d\n", __func__, ROBBufferSizeInKByte); + dml_print("DML::%s: MaxNumDPP = %0d\n", __func__, MaxNumDPP); + dml_print("DML::%s: MaxTotalDETInKByte = %0d\n", __func__, *MaxTotalDETInKByte); + dml_print("DML::%s: nomDETInKByte = %0d\n", __func__, *nomDETInKByte); + dml_print("DML::%s: MinCompressedBufferSizeInKByte = %0d\n", __func__, *MinCompressedBufferSizeInKByte); +#endif + + if (det_buff_size_override_en) { + *nomDETInKByte = det_buff_size_override_val; +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: nomDETInKByte = %0d (override)\n", __func__, *nomDETInKByte); +#endif + } +} // CalculateMaxDETAndMinCompressedBufferSize + +bool dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces, + double ReturnBW, + bool NotUrgentLatencyHiding[], + double ReadBandwidthLuma[], + double ReadBandwidthChroma[], + double cursor_bw[], + double meta_row_bandwidth[], + double dpte_row_bandwidth[], + unsigned int NumberOfDPP[], + double UrgentBurstFactorLuma[], + double UrgentBurstFactorChroma[], + double UrgentBurstFactorCursor[]) +{ + unsigned int k; + bool NotEnoughUrgentLatencyHiding = false; + bool CalculateVActiveBandwithSupport_val = false; + double VActiveBandwith = 0; + + for (k = 0; k < NumberOfActiveSurfaces; ++k) { + if (NotUrgentLatencyHiding[k]) { + NotEnoughUrgentLatencyHiding = true; + } + } + + for (k = 0; k < NumberOfActiveSurfaces; ++k) { + VActiveBandwith = VActiveBandwith + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * meta_row_bandwidth[k] + NumberOfDPP[k] * dpte_row_bandwidth[k]; + } + + CalculateVActiveBandwithSupport_val = (VActiveBandwith <= ReturnBW) && !NotEnoughUrgentLatencyHiding; + +#ifdef __DML_VBA_DEBUG__ +dml_print("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, NotEnoughUrgentLatencyHiding); +dml_print("DML::%s: VActiveBandwith = %f\n", __func__, VActiveBandwith); +dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW); +dml_print("DML::%s: CalculateVActiveBandwithSupport_val = %d\n", __func__, CalculateVActiveBandwithSupport_val); +#endif + return CalculateVActiveBandwithSupport_val; +} + +void dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces, + double ReturnBW, + bool NotUrgentLatencyHiding[], + double ReadBandwidthLuma[], + double ReadBandwidthChroma[], + double PrefetchBandwidthLuma[], + double PrefetchBandwidthChroma[], + double cursor_bw[], + double meta_row_bandwidth[], + double dpte_row_bandwidth[], + double cursor_bw_pre[], + double prefetch_vmrow_bw[], + unsigned int NumberOfDPP[], + double UrgentBurstFactorLuma[], + double UrgentBurstFactorChroma[], + double UrgentBurstFactorCursor[], + double UrgentBurstFactorLumaPre[], + double UrgentBurstFactorChromaPre[], + double UrgentBurstFactorCursorPre[], + + /* output */ + double *PrefetchBandwidth, + double *FractionOfUrgentBandwidth, + bool *PrefetchBandwidthSupport) +{ + unsigned int k; + bool NotEnoughUrgentLatencyHiding = false; + for (k = 0; k < NumberOfActiveSurfaces; ++k) { + if (NotUrgentLatencyHiding[k]) { + NotEnoughUrgentLatencyHiding = true; + } + } + + *PrefetchBandwidth = 0; + for (k = 0; k < NumberOfActiveSurfaces; ++k) { + *PrefetchBandwidth = *PrefetchBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]), + NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); + } + + *PrefetchBandwidthSupport = (*PrefetchBandwidth <= ReturnBW) && !NotEnoughUrgentLatencyHiding; + *FractionOfUrgentBandwidth = *PrefetchBandwidth / ReturnBW; +} + +double dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces, + double ReturnBW, + double ReadBandwidthLuma[], + double ReadBandwidthChroma[], + double PrefetchBandwidthLuma[], + double PrefetchBandwidthChroma[], + double cursor_bw[], + double cursor_bw_pre[], + unsigned int NumberOfDPP[], + double UrgentBurstFactorLuma[], + double UrgentBurstFactorChroma[], + double UrgentBurstFactorCursor[], + double UrgentBurstFactorLumaPre[], + double UrgentBurstFactorChromaPre[], + double UrgentBurstFactorCursorPre[]) +{ + unsigned int k; + double CalculateBandwidthAvailableForImmediateFlip_val = ReturnBW; + + for (k = 0; k < NumberOfActiveSurfaces; ++k) { + CalculateBandwidthAvailableForImmediateFlip_val = CalculateBandwidthAvailableForImmediateFlip_val - dml_max(ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k], + NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); + } + + return CalculateBandwidthAvailableForImmediateFlip_val; +} + +void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces, + double ReturnBW, + enum immediate_flip_requirement ImmediateFlipRequirement[], + double final_flip_bw[], + double ReadBandwidthLuma[], + double ReadBandwidthChroma[], + double PrefetchBandwidthLuma[], + double PrefetchBandwidthChroma[], + double cursor_bw[], + double meta_row_bandwidth[], + double dpte_row_bandwidth[], + double cursor_bw_pre[], + double prefetch_vmrow_bw[], + unsigned int NumberOfDPP[], + double UrgentBurstFactorLuma[], + double UrgentBurstFactorChroma[], + double UrgentBurstFactorCursor[], + double UrgentBurstFactorLumaPre[], + double UrgentBurstFactorChromaPre[], + double UrgentBurstFactorCursorPre[], + + /* output */ + double *TotalBandwidth, + double *FractionOfUrgentBandwidth, + bool *ImmediateFlipBandwidthSupport) +{ + unsigned int k; + *TotalBandwidth = 0; + for (k = 0; k < NumberOfActiveSurfaces; ++k) { + if (ImmediateFlipRequirement[k] != dm_immediate_flip_not_required) { + *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], + NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k], + NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); + } else { + *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]) + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k], + NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); + } + } + *ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW); + *FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW; +} + +bool dml32_CalculateDETSwathFillLatencyHiding(unsigned int NumberOfActiveSurfaces, + double ReturnBW, + double UrgentLatency, + unsigned int SwathHeightY[], + unsigned int SwathHeightC[], + unsigned int SwathWidthY[], + unsigned int SwathWidthC[], + double BytePerPixelInDETY[], + double BytePerPixelInDETC[], + unsigned int DETBufferSizeY[], + unsigned int DETBufferSizeC[], + unsigned int NumOfDPP[], + unsigned int HTotal[], + double PixelClock[], + double VRatioY[], + double VRatioC[], + enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[DC__NUM_DPP__MAX]) +{ + int k; + double SwathSizeAllSurfaces = 0; + double SwathSizeAllSurfacesInFetchTimeUs; + double DETSwathLatencyHidingUs; + double DETSwathLatencyHidingYUs; + double DETSwathLatencyHidingCUs; + double SwathSizePerSurfaceY[DC__NUM_DPP__MAX]; + double SwathSizePerSurfaceC[DC__NUM_DPP__MAX]; + bool NotEnoughDETSwathFillLatencyHiding = false; + + /* calculate sum of single swath size for all pipes in bytes */ + for (k = 0; k < NumberOfActiveSurfaces; k++) { + SwathSizePerSurfaceY[k] = SwathHeightY[k] * SwathWidthY[k] * BytePerPixelInDETY[k] * NumOfDPP[k]; + + if (SwathHeightC[k] != 0) + SwathSizePerSurfaceC[k] = SwathHeightC[k] * SwathWidthC[k] * BytePerPixelInDETC[k] * NumOfDPP[k]; + else + SwathSizePerSurfaceC[k] = 0; + + SwathSizeAllSurfaces += SwathSizePerSurfaceY[k] + SwathSizePerSurfaceC[k]; + } + + SwathSizeAllSurfacesInFetchTimeUs = SwathSizeAllSurfaces / ReturnBW + UrgentLatency; + + /* ensure all DET - 1 swath can hide a fetch for all surfaces */ + for (k = 0; k < NumberOfActiveSurfaces; k++) { + double LineTime = HTotal[k] / PixelClock[k]; + + /* only care if surface is not phantom */ + if (UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) { + DETSwathLatencyHidingYUs = (dml_floor(DETBufferSizeY[k] / BytePerPixelInDETY[k] / SwathWidthY[k], 1.0) - SwathHeightY[k]) / VRatioY[k] * LineTime; + + if (SwathHeightC[k] != 0) { + DETSwathLatencyHidingCUs = (dml_floor(DETBufferSizeC[k] / BytePerPixelInDETC[k] / SwathWidthC[k], 1.0) - SwathHeightC[k]) / VRatioC[k] * LineTime; + + DETSwathLatencyHidingUs = dml_min(DETSwathLatencyHidingYUs, DETSwathLatencyHidingCUs); + } else { + DETSwathLatencyHidingUs = DETSwathLatencyHidingYUs; + } + + /* DET must be able to hide time to fetch 1 swath for each surface */ + if (DETSwathLatencyHidingUs < SwathSizeAllSurfacesInFetchTimeUs) { + NotEnoughDETSwathFillLatencyHiding = true; + break; + } + } + } + + return NotEnoughDETSwathFillLatencyHiding; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h new file mode 100644 index 000000000..779c6805f --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h @@ -0,0 +1,1162 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DML_DCN32_DISPLAY_MODE_VBA_UTIL_32_H__ +#define __DML_DCN32_DISPLAY_MODE_VBA_UTIL_32_H__ + +#include "../display_mode_enums.h" +#include "os_types.h" +#include "../dc_features.h" +#include "../display_mode_structs.h" +#include "../display_mode_vba.h" + +unsigned int dml32_dscceComputeDelay( + unsigned int bpc, + double BPP, + unsigned int sliceWidth, + unsigned int numSlices, + enum output_format_class pixelFormat, + enum output_encoder_class Output); + +unsigned int dml32_dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output); + +bool IsVertical(enum dm_rotation_angle Scan); + +void dml32_CalculateBytePerPixelAndBlockSizes( + enum source_format_class SourcePixelFormat, + enum dm_swizzle_mode SurfaceTiling, + + /*Output*/ + unsigned int *BytePerPixelY, + unsigned int *BytePerPixelC, + double *BytePerPixelDETY, + double *BytePerPixelDETC, + unsigned int *BlockHeight256BytesY, + unsigned int *BlockHeight256BytesC, + unsigned int *BlockWidth256BytesY, + unsigned int *BlockWidth256BytesC, + unsigned int *MacroTileHeightY, + unsigned int *MacroTileHeightC, + unsigned int *MacroTileWidthY, + unsigned int *MacroTileWidthC); + +void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput( + double HRatio, + double HRatioChroma, + double VRatio, + double VRatioChroma, + double MaxDCHUBToPSCLThroughput, + double MaxPSCLToLBThroughput, + double PixelClock, + enum source_format_class SourcePixelFormat, + unsigned int HTaps, + unsigned int HTapsChroma, + unsigned int VTaps, + unsigned int VTapsChroma, + + /* output */ + double *PSCL_THROUGHPUT, + double *PSCL_THROUGHPUT_CHROMA, + double *DPPCLKUsingSingleDPP); + +void dml32_CalculateSwathAndDETConfiguration( + unsigned int DETSizeOverride[], + enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], + unsigned int ConfigReturnBufferSizeInKByte, + unsigned int MaxTotalDETInKByte, + unsigned int MinCompressedBufferSizeInKByte, + double ForceSingleDPP, + unsigned int NumberOfActiveSurfaces, + unsigned int nomDETInKByte, + enum unbounded_requesting_policy UseUnboundedRequestingFinal, + bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment, + unsigned int PixelChunkSizeKBytes, + unsigned int ROBSizeKBytes, + unsigned int CompressedBufferSegmentSizeInkByteFinal, + enum output_encoder_class Output[], + double ReadBandwidthLuma[], + double ReadBandwidthChroma[], + double MaximumSwathWidthLuma[], + double MaximumSwathWidthChroma[], + enum dm_rotation_angle SourceRotation[], + bool ViewportStationary[], + enum source_format_class SourcePixelFormat[], + enum dm_swizzle_mode SurfaceTiling[], + unsigned int ViewportWidth[], + unsigned int ViewportHeight[], + unsigned int ViewportXStart[], + unsigned int ViewportYStart[], + unsigned int ViewportXStartC[], + unsigned int ViewportYStartC[], + unsigned int SurfaceWidthY[], + unsigned int SurfaceWidthC[], + unsigned int SurfaceHeightY[], + unsigned int SurfaceHeightC[], + unsigned int Read256BytesBlockHeightY[], + unsigned int Read256BytesBlockHeightC[], + unsigned int Read256BytesBlockWidthY[], + unsigned int Read256BytesBlockWidthC[], + enum odm_combine_mode ODMMode[], + unsigned int BlendingAndTiming[], + unsigned int BytePerPixY[], + unsigned int BytePerPixC[], + double BytePerPixDETY[], + double BytePerPixDETC[], + unsigned int HActive[], + double HRatio[], + double HRatioChroma[], + unsigned int DPPPerSurface[], + + /* Output */ + unsigned int swath_width_luma_ub[], + unsigned int swath_width_chroma_ub[], + double SwathWidth[], + double SwathWidthChroma[], + unsigned int SwathHeightY[], + unsigned int SwathHeightC[], + unsigned int DETBufferSizeInKByte[], + unsigned int DETBufferSizeY[], + unsigned int DETBufferSizeC[], + bool *UnboundedRequestEnabled, + unsigned int *CompressedBufferSizeInkByte, + unsigned int *CompBufReservedSpaceKBytes, + bool *CompBufReservedSpaceNeedAdjustment, + bool ViewportSizeSupportPerSurface[], + bool *ViewportSizeSupport); + +void dml32_CalculateSwathWidth( + bool ForceSingleDPP, + unsigned int NumberOfActiveSurfaces, + enum source_format_class SourcePixelFormat[], + enum dm_rotation_angle SourceScan[], + bool ViewportStationary[], + unsigned int ViewportWidth[], + unsigned int ViewportHeight[], + unsigned int ViewportXStart[], + unsigned int ViewportYStart[], + unsigned int ViewportXStartC[], + unsigned int ViewportYStartC[], + unsigned int SurfaceWidthY[], + unsigned int SurfaceWidthC[], + unsigned int SurfaceHeightY[], + unsigned int SurfaceHeightC[], + enum odm_combine_mode ODMMode[], + unsigned int BytePerPixY[], + unsigned int BytePerPixC[], + unsigned int Read256BytesBlockHeightY[], + unsigned int Read256BytesBlockHeightC[], + unsigned int Read256BytesBlockWidthY[], + unsigned int Read256BytesBlockWidthC[], + unsigned int BlendingAndTiming[], + unsigned int HActive[], + double HRatio[], + unsigned int DPPPerSurface[], + + /* Output */ + double SwathWidthdoubleDPPY[], + double SwathWidthdoubleDPPC[], + double SwathWidthY[], // per-pipe + double SwathWidthC[], // per-pipe + unsigned int MaximumSwathHeightY[], + unsigned int MaximumSwathHeightC[], + unsigned int swath_width_luma_ub[], // per-pipe + unsigned int swath_width_chroma_ub[]); + +bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, + unsigned int TotalNumberOfActiveDPP, + bool NoChroma, + enum output_encoder_class Output, + enum dm_swizzle_mode SurfaceTiling, + bool CompBufReservedSpaceNeedAdjustment, + bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment); + +void dml32_CalculateDETBufferSize( + unsigned int DETSizeOverride[], + enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], + bool ForceSingleDPP, + unsigned int NumberOfActiveSurfaces, + bool UnboundedRequestEnabled, + unsigned int nomDETInKByte, + unsigned int MaxTotalDETInKByte, + unsigned int ConfigReturnBufferSizeInKByte, + unsigned int MinCompressedBufferSizeInKByte, + unsigned int CompressedBufferSegmentSizeInkByteFinal, + enum source_format_class SourcePixelFormat[], + double ReadBandwidthLuma[], + double ReadBandwidthChroma[], + unsigned int RoundedUpMaxSwathSizeBytesY[], + unsigned int RoundedUpMaxSwathSizeBytesC[], + unsigned int DPPPerSurface[], + /* Output */ + unsigned int DETBufferSizeInKByte[], + unsigned int *CompressedBufferSizeInkByte); + +void dml32_CalculateODMMode( + unsigned int MaximumPixelsPerLinePerDSCUnit, + unsigned int HActive, + enum output_format_class OutFormat, + enum output_encoder_class Output, + enum odm_combine_policy ODMUse, + double StateDispclk, + double MaxDispclk, + bool DSCEnable, + unsigned int TotalNumberOfActiveDPP, + unsigned int MaxNumDPP, + double PixelClock, + double DISPCLKDPPCLKDSCCLKDownSpreading, + double DISPCLKRampingMargin, + double DISPCLKDPPCLKVCOSpeed, + unsigned int NumberOfDSCSlices, + + /* Output */ + bool *TotalAvailablePipesSupport, + unsigned int *NumberOfDPP, + enum odm_combine_mode *ODMMode, + double *RequiredDISPCLKPerSurface); + +double dml32_CalculateRequiredDispclk( + enum odm_combine_mode ODMMode, + double PixelClock, + double DISPCLKDPPCLKDSCCLKDownSpreading, + double DISPCLKRampingMargin, + double DISPCLKDPPCLKVCOSpeed, + double MaxDispclk); + +double dml32_RoundToDFSGranularity(double Clock, bool round_up, double VCOSpeed); + +void dml32_CalculateOutputLink( + double PHYCLKPerState, + double PHYCLKD18PerState, + double PHYCLKD32PerState, + double Downspreading, + bool IsMainSurfaceUsingTheIndicatedTiming, + enum output_encoder_class Output, + enum output_format_class OutputFormat, + unsigned int HTotal, + unsigned int HActive, + double PixelClockBackEnd, + double ForcedOutputLinkBPP, + unsigned int DSCInputBitPerComponent, + unsigned int NumberOfDSCSlices, + double AudioSampleRate, + unsigned int AudioSampleLayout, + enum odm_combine_mode ODMModeNoDSC, + enum odm_combine_mode ODMModeDSC, + bool DSCEnable, + unsigned int OutputLinkDPLanes, + enum dm_output_link_dp_rate OutputLinkDPRate, + + /* Output */ + bool *RequiresDSC, + double *RequiresFEC, + double *OutBpp, + enum dm_output_type *OutputType, + enum dm_output_rate *OutputRate, + unsigned int *RequiredSlots); + +void dml32_CalculateDPPCLK( + unsigned int NumberOfActiveSurfaces, + double DISPCLKDPPCLKDSCCLKDownSpreading, + double DISPCLKDPPCLKVCOSpeed, + double DPPCLKUsingSingleDPP[], + unsigned int DPPPerSurface[], + + /* output */ + double *GlobalDPPCLK, + double Dppclk[]); + +double dml32_TruncToValidBPP( + double LinkBitRate, + unsigned int Lanes, + unsigned int HTotal, + unsigned int HActive, + double PixelClock, + double DesiredBPP, + bool DSCEnable, + enum output_encoder_class Output, + enum output_format_class Format, + unsigned int DSCInputBitPerComponent, + unsigned int DSCSlices, + unsigned int AudioRate, + unsigned int AudioLayout, + enum odm_combine_mode ODMModeNoDSC, + enum odm_combine_mode ODMModeDSC, + /* Output */ + unsigned int *RequiredSlots); + +double dml32_RequiredDTBCLK( + bool DSCEnable, + double PixelClock, + enum output_format_class OutputFormat, + double OutputBpp, + unsigned int DSCSlices, + unsigned int HTotal, + unsigned int HActive, + unsigned int AudioRate, + unsigned int AudioLayout); + +unsigned int dml32_DSCDelayRequirement(bool DSCEnabled, + enum odm_combine_mode ODMMode, + unsigned int DSCInputBitPerComponent, + double OutputBpp, + unsigned int HActive, + unsigned int HTotal, + unsigned int NumberOfDSCSlices, + enum output_format_class OutputFormat, + enum output_encoder_class Output, + double PixelClock, + double PixelClockBackEnd, + double dsc_delay_factor_wa); + +void dml32_CalculateSurfaceSizeInMall( + unsigned int NumberOfActiveSurfaces, + unsigned int MALLAllocatedForDCN, + enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[], + bool DCCEnable[], + bool ViewportStationary[], + unsigned int ViewportXStartY[], + unsigned int ViewportYStartY[], + unsigned int ViewportXStartC[], + unsigned int ViewportYStartC[], + unsigned int ViewportWidthY[], + unsigned int ViewportHeightY[], + unsigned int BytesPerPixelY[], + unsigned int ViewportWidthC[], + unsigned int ViewportHeightC[], + unsigned int BytesPerPixelC[], + unsigned int SurfaceWidthY[], + unsigned int SurfaceWidthC[], + unsigned int SurfaceHeightY[], + unsigned int SurfaceHeightC[], + unsigned int Read256BytesBlockWidthY[], + unsigned int Read256BytesBlockWidthC[], + unsigned int Read256BytesBlockHeightY[], + unsigned int Read256BytesBlockHeightC[], + unsigned int ReadBlockWidthY[], + unsigned int ReadBlockWidthC[], + unsigned int ReadBlockHeightY[], + unsigned int ReadBlockHeightC[], + + /* Output */ + unsigned int SurfaceSizeInMALL[], + bool *ExceededMALLSize); + +void dml32_CalculateVMRowAndSwath( + unsigned int NumberOfActiveSurfaces, + DmlPipe myPipe[], + unsigned int SurfaceSizeInMALL[], + unsigned int PTEBufferSizeInRequestsLuma, + unsigned int PTEBufferSizeInRequestsChroma, + unsigned int DCCMetaBufferSizeBytes, + enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[], + enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], + unsigned int MALLAllocatedForDCN, + double SwathWidthY[], + double SwathWidthC[], + bool GPUVMEnable, + bool HostVMEnable, + unsigned int HostVMMaxNonCachedPageTableLevels, + unsigned int GPUVMMaxPageTableLevels, + unsigned int GPUVMMinPageSizeKBytes[], + unsigned int HostVMMinPageSize, + + /* Output */ + bool PTEBufferSizeNotExceeded[], + bool DCCMetaBufferSizeNotExceeded[], + unsigned int dpte_row_width_luma_ub[], + unsigned int dpte_row_width_chroma_ub[], + unsigned int dpte_row_height_luma[], + unsigned int dpte_row_height_chroma[], + unsigned int dpte_row_height_linear_luma[], // VBA_DELTA + unsigned int dpte_row_height_linear_chroma[], // VBA_DELTA + unsigned int meta_req_width[], + unsigned int meta_req_width_chroma[], + unsigned int meta_req_height[], + unsigned int meta_req_height_chroma[], + unsigned int meta_row_width[], + unsigned int meta_row_width_chroma[], + unsigned int meta_row_height[], + unsigned int meta_row_height_chroma[], + unsigned int vm_group_bytes[], + unsigned int dpte_group_bytes[], + unsigned int PixelPTEReqWidthY[], + unsigned int PixelPTEReqHeightY[], + unsigned int PTERequestSizeY[], + unsigned int PixelPTEReqWidthC[], + unsigned int PixelPTEReqHeightC[], + unsigned int PTERequestSizeC[], + unsigned int dpde0_bytes_per_frame_ub_l[], + unsigned int meta_pte_bytes_per_frame_ub_l[], + unsigned int dpde0_bytes_per_frame_ub_c[], + unsigned int meta_pte_bytes_per_frame_ub_c[], + double PrefetchSourceLinesY[], + double PrefetchSourceLinesC[], + double VInitPreFillY[], + double VInitPreFillC[], + unsigned int MaxNumSwathY[], + unsigned int MaxNumSwathC[], + double meta_row_bw[], + double dpte_row_bw[], + double PixelPTEBytesPerRow[], + double PDEAndMetaPTEBytesFrame[], + double MetaRowByte[], + bool use_one_row_for_frame[], + bool use_one_row_for_frame_flip[], + bool UsesMALLForStaticScreen[], + bool PTE_BUFFER_MODE[], + unsigned int BIGK_FRAGMENT_SIZE[]); + +unsigned int dml32_CalculateVMAndRowBytes( + bool ViewportStationary, + bool DCCEnable, + unsigned int NumberOfDPPs, + unsigned int BlockHeight256Bytes, + unsigned int BlockWidth256Bytes, + enum source_format_class SourcePixelFormat, + unsigned int SurfaceTiling, + unsigned int BytePerPixel, + enum dm_rotation_angle SourceScan, + double SwathWidth, + unsigned int ViewportHeight, + unsigned int ViewportXStart, + unsigned int ViewportYStart, + bool GPUVMEnable, + bool HostVMEnable, + unsigned int HostVMMaxNonCachedPageTableLevels, + unsigned int GPUVMMaxPageTableLevels, + unsigned int GPUVMMinPageSizeKBytes, + unsigned int HostVMMinPageSize, + unsigned int PTEBufferSizeInRequests, + unsigned int Pitch, + unsigned int DCCMetaPitch, + unsigned int MacroTileWidth, + unsigned int MacroTileHeight, + + /* Output */ + unsigned int *MetaRowByte, + unsigned int *PixelPTEBytesPerRow, + unsigned int *dpte_row_width_ub, + unsigned int *dpte_row_height, + unsigned int *dpte_row_height_linear, + unsigned int *PixelPTEBytesPerRow_one_row_per_frame, + unsigned int *dpte_row_width_ub_one_row_per_frame, + unsigned int *dpte_row_height_one_row_per_frame, + unsigned int *MetaRequestWidth, + unsigned int *MetaRequestHeight, + unsigned int *meta_row_width, + unsigned int *meta_row_height, + unsigned int *PixelPTEReqWidth, + unsigned int *PixelPTEReqHeight, + unsigned int *PTERequestSize, + unsigned int *DPDE0BytesFrame, + unsigned int *MetaPTEBytesFrame); + +double dml32_CalculatePrefetchSourceLines( + double VRatio, + unsigned int VTaps, + bool Interlace, + bool ProgressiveToInterlaceUnitInOPP, + unsigned int SwathHeight, + enum dm_rotation_angle SourceRotation, + bool ViewportStationary, + double SwathWidth, + unsigned int ViewportHeight, + unsigned int ViewportXStart, + unsigned int ViewportYStart, + + /* Output */ + double *VInitPreFill, + unsigned int *MaxNumSwath); + +void dml32_CalculateMALLUseForStaticScreen( + unsigned int NumberOfActiveSurfaces, + unsigned int MALLAllocatedForDCNFinal, + enum dm_use_mall_for_static_screen_mode *UseMALLForStaticScreen, + unsigned int SurfaceSizeInMALL[], + bool one_row_per_frame_fits_in_buffer[], + + /* output */ + bool UsesMALLForStaticScreen[]); + +void dml32_CalculateRowBandwidth( + bool GPUVMEnable, + enum source_format_class SourcePixelFormat, + double VRatio, + double VRatioChroma, + bool DCCEnable, + double LineTime, + unsigned int MetaRowByteLuma, + unsigned int MetaRowByteChroma, + unsigned int meta_row_height_luma, + unsigned int meta_row_height_chroma, + unsigned int PixelPTEBytesPerRowLuma, + unsigned int PixelPTEBytesPerRowChroma, + unsigned int dpte_row_height_luma, + unsigned int dpte_row_height_chroma, + /* Output */ + double *meta_row_bw, + double *dpte_row_bw); + +double dml32_CalculateUrgentLatency( + double UrgentLatencyPixelDataOnly, + double UrgentLatencyPixelMixedWithVMData, + double UrgentLatencyVMDataOnly, + bool DoUrgentLatencyAdjustment, + double UrgentLatencyAdjustmentFabricClockComponent, + double UrgentLatencyAdjustmentFabricClockReference, + double FabricClock); + +void dml32_CalculateUrgentBurstFactor( + enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange, + unsigned int swath_width_luma_ub, + unsigned int swath_width_chroma_ub, + unsigned int SwathHeightY, + unsigned int SwathHeightC, + double LineTime, + double UrgentLatency, + double CursorBufferSize, + unsigned int CursorWidth, + unsigned int CursorBPP, + double VRatio, + double VRatioC, + double BytePerPixelInDETY, + double BytePerPixelInDETC, + unsigned int DETBufferSizeY, + unsigned int DETBufferSizeC, + /* Output */ + double *UrgentBurstFactorCursor, + double *UrgentBurstFactorLuma, + double *UrgentBurstFactorChroma, + bool *NotEnoughUrgentLatencyHiding); + +void dml32_CalculateDCFCLKDeepSleep( + unsigned int NumberOfActiveSurfaces, + unsigned int BytePerPixelY[], + unsigned int BytePerPixelC[], + double VRatio[], + double VRatioChroma[], + double SwathWidthY[], + double SwathWidthC[], + unsigned int DPPPerSurface[], + double HRatio[], + double HRatioChroma[], + double PixelClock[], + double PSCL_THROUGHPUT[], + double PSCL_THROUGHPUT_CHROMA[], + double Dppclk[], + double ReadBandwidthLuma[], + double ReadBandwidthChroma[], + unsigned int ReturnBusWidth, + + /* Output */ + double *DCFClkDeepSleep); + +double dml32_CalculateWriteBackDelay( + enum source_format_class WritebackPixelFormat, + double WritebackHRatio, + double WritebackVRatio, + unsigned int WritebackVTaps, + unsigned int WritebackDestinationWidth, + unsigned int WritebackDestinationHeight, + unsigned int WritebackSourceHeight, + unsigned int HTotal); + +void dml32_UseMinimumDCFCLK( + enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], + bool DRRDisplay[], + bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal, + unsigned int MaxInterDCNTileRepeaters, + unsigned int MaxPrefetchMode, + double DRAMClockChangeLatencyFinal, + double FCLKChangeLatency, + double SREnterPlusExitTime, + unsigned int ReturnBusWidth, + unsigned int RoundTripPingLatencyCycles, + unsigned int ReorderingBytes, + unsigned int PixelChunkSizeInKByte, + unsigned int MetaChunkSize, + bool GPUVMEnable, + unsigned int GPUVMMaxPageTableLevels, + bool HostVMEnable, + unsigned int NumberOfActiveSurfaces, + double HostVMMinPageSize, + unsigned int HostVMMaxNonCachedPageTableLevels, + bool DynamicMetadataVMEnabled, + bool ImmediateFlipRequirement, + bool ProgressiveToInterlaceUnitInOPP, + double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation, + double PercentOfIdealSDPPortBWReceivedAfterUrgLatency, + unsigned int VTotal[], + unsigned int VActive[], + unsigned int DynamicMetadataTransmittedBytes[], + unsigned int DynamicMetadataLinesBeforeActiveRequired[], + bool Interlace[], + double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX], + double RequiredDISPCLK[][2], + double UrgLatency[], + unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX], + double ProjectedDCFClkDeepSleep[][2], + double MaximumVStartup[][2][DC__NUM_DPP__MAX], + unsigned int TotalNumberOfActiveDPP[][2], + unsigned int TotalNumberOfDCCActiveDPP[][2], + unsigned int dpte_group_bytes[], + double PrefetchLinesY[][2][DC__NUM_DPP__MAX], + double PrefetchLinesC[][2][DC__NUM_DPP__MAX], + unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX], + unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX], + unsigned int BytePerPixelY[], + unsigned int BytePerPixelC[], + unsigned int HTotal[], + double PixelClock[], + double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX], + double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX], + double MetaRowBytes[][2][DC__NUM_DPP__MAX], + bool DynamicMetadataEnable[], + double ReadBandwidthLuma[], + double ReadBandwidthChroma[], + double DCFCLKPerState[], + /* Output */ + double DCFCLKState[][2]); + +unsigned int dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes, + unsigned int TotalNumberOfActiveDPP, + unsigned int PixelChunkSizeInKByte, + unsigned int TotalNumberOfDCCActiveDPP, + unsigned int MetaChunkSize, + bool GPUVMEnable, + bool HostVMEnable, + unsigned int NumberOfActiveSurfaces, + unsigned int NumberOfDPP[], + unsigned int dpte_group_bytes[], + double HostVMInefficiencyFactor, + double HostVMMinPageSize, + unsigned int HostVMMaxNonCachedPageTableLevels); + +void dml32_CalculateVUpdateAndDynamicMetadataParameters( + unsigned int MaxInterDCNTileRepeaters, + double Dppclk, + double Dispclk, + double DCFClkDeepSleep, + double PixelClock, + unsigned int HTotal, + unsigned int VBlank, + unsigned int DynamicMetadataTransmittedBytes, + unsigned int DynamicMetadataLinesBeforeActiveRequired, + unsigned int InterlaceEnable, + bool ProgressiveToInterlaceUnitInOPP, + double *TSetup, + double *Tdmbf, + double *Tdmec, + double *Tdmsks, + unsigned int *VUpdateOffsetPix, + double *VUpdateWidthPix, + double *VReadyOffsetPix); + +double dml32_CalculateTWait( + unsigned int PrefetchMode, + enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange, + bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal, + bool DRRDisplay, + double DRAMClockChangeLatency, + double FCLKChangeLatency, + double UrgentLatency, + double SREnterPlusExitTime); + +double dml32_get_return_bw_mbps(const soc_bounding_box_st *soc, + const int VoltageLevel, + const bool HostVMEnable, + const double DCFCLK, + const double FabricClock, + const double DRAMSpeed); + +double dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st *soc, + const int VoltageLevel, + const double DCFCLK, + const double FabricClock, + const double DRAMSpeed); + +double dml32_CalculateExtraLatency( + unsigned int RoundTripPingLatencyCycles, + unsigned int ReorderingBytes, + double DCFCLK, + unsigned int TotalNumberOfActiveDPP, + unsigned int PixelChunkSizeInKByte, + unsigned int TotalNumberOfDCCActiveDPP, + unsigned int MetaChunkSize, + double ReturnBW, + bool GPUVMEnable, + bool HostVMEnable, + unsigned int NumberOfActiveSurfaces, + unsigned int NumberOfDPP[], + unsigned int dpte_group_bytes[], + double HostVMInefficiencyFactor, + double HostVMMinPageSize, + unsigned int HostVMMaxNonCachedPageTableLevels); + +bool dml32_CalculatePrefetchSchedule( + struct vba_vars_st *v, + unsigned int k, + double HostVMInefficiencyFactor, + DmlPipe *myPipe, + unsigned int DSCDelay, + unsigned int DPP_RECOUT_WIDTH, + unsigned int VStartup, + unsigned int MaxVStartup, + double UrgentLatency, + double UrgentExtraLatency, + double TCalc, + unsigned int PDEAndMetaPTEBytesFrame, + unsigned int MetaRowByte, + unsigned int PixelPTEBytesPerRow, + double PrefetchSourceLinesY, + unsigned int SwathWidthY, + unsigned int VInitPreFillY, + unsigned int MaxNumSwathY, + double PrefetchSourceLinesC, + unsigned int SwathWidthC, + unsigned int VInitPreFillC, + unsigned int MaxNumSwathC, + unsigned int swath_width_luma_ub, + unsigned int swath_width_chroma_ub, + unsigned int SwathHeightY, + unsigned int SwathHeightC, + double TWait, + double TPreReq, + /* Output */ + double *DSTXAfterScaler, + double *DSTYAfterScaler, + double *DestinationLinesForPrefetch, + double *PrefetchBandwidth, + double *DestinationLinesToRequestVMInVBlank, + double *DestinationLinesToRequestRowInVBlank, + double *VRatioPrefetchY, + double *VRatioPrefetchC, + double *RequiredPrefetchPixDataBWLuma, + double *RequiredPrefetchPixDataBWChroma, + bool *NotEnoughTimeForDynamicMetadata, + double *Tno_bw, + double *prefetch_vmrow_bw, + double *Tdmdl_vm, + double *Tdmdl, + double *TSetup, + unsigned int *VUpdateOffsetPix, + double *VUpdateWidthPix, + double *VReadyOffsetPix); + +void dml32_CalculateFlipSchedule( + double HostVMInefficiencyFactor, + double UrgentExtraLatency, + double UrgentLatency, + unsigned int GPUVMMaxPageTableLevels, + bool HostVMEnable, + unsigned int HostVMMaxNonCachedPageTableLevels, + bool GPUVMEnable, + double HostVMMinPageSize, + double PDEAndMetaPTEBytesPerFrame, + double MetaRowBytes, + double DPTEBytesPerRow, + double BandwidthAvailableForImmediateFlip, + unsigned int TotImmediateFlipBytes, + enum source_format_class SourcePixelFormat, + double LineTime, + double VRatio, + double VRatioChroma, + double Tno_bw, + bool DCCEnable, + unsigned int dpte_row_height, + unsigned int meta_row_height, + unsigned int dpte_row_height_chroma, + unsigned int meta_row_height_chroma, + bool use_one_row_for_frame_flip, + + /* Output */ + double *DestinationLinesToRequestVMInImmediateFlip, + double *DestinationLinesToRequestRowInImmediateFlip, + double *final_flip_bw, + bool *ImmediateFlipSupportedForPipe); + +void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( + struct vba_vars_st *v, + unsigned int PrefetchMode, + double DCFCLK, + double ReturnBW, + SOCParametersList mmSOCParameters, + double SOCCLK, + double DCFClkDeepSleep, + unsigned int DETBufferSizeY[], + unsigned int DETBufferSizeC[], + unsigned int SwathHeightY[], + unsigned int SwathHeightC[], + double SwathWidthY[], + double SwathWidthC[], + unsigned int DPPPerSurface[], + double BytePerPixelDETY[], + double BytePerPixelDETC[], + double DSTXAfterScaler[], + double DSTYAfterScaler[], + bool UnboundedRequestEnabled, + unsigned int CompressedBufferSizeInkByte, + + /* Output */ + enum clock_change_support *DRAMClockChangeSupport, + double MaxActiveDRAMClockChangeLatencySupported[], + unsigned int SubViewportLinesNeededInMALL[], + enum dm_fclock_change_support *FCLKChangeSupport, + double *MinActiveFCLKChangeLatencySupported, + bool *USRRetrainingSupport, + double ActiveDRAMClockChangeLatencyMargin[]); + +double dml32_CalculateWriteBackDISPCLK( + enum source_format_class WritebackPixelFormat, + double PixelClock, + double WritebackHRatio, + double WritebackVRatio, + unsigned int WritebackHTaps, + unsigned int WritebackVTaps, + unsigned int WritebackSourceWidth, + unsigned int WritebackDestinationWidth, + unsigned int HTotal, + unsigned int WritebackLineBufferSize, + double DISPCLKDPPCLKVCOSpeed); + +void dml32_CalculateMinAndMaxPrefetchMode( + enum dm_prefetch_modes AllowForPStateChangeOrStutterInVBlankFinal, + unsigned int *MinPrefetchMode, + unsigned int *MaxPrefetchMode); + +void dml32_CalculatePixelDeliveryTimes( + unsigned int NumberOfActiveSurfaces, + double VRatio[], + double VRatioChroma[], + double VRatioPrefetchY[], + double VRatioPrefetchC[], + unsigned int swath_width_luma_ub[], + unsigned int swath_width_chroma_ub[], + unsigned int DPPPerSurface[], + double HRatio[], + double HRatioChroma[], + double PixelClock[], + double PSCL_THROUGHPUT[], + double PSCL_THROUGHPUT_CHROMA[], + double Dppclk[], + unsigned int BytePerPixelC[], + enum dm_rotation_angle SourceRotation[], + unsigned int NumberOfCursors[], + unsigned int CursorWidth[][DC__NUM_CURSOR__MAX], + unsigned int CursorBPP[][DC__NUM_CURSOR__MAX], + unsigned int BlockWidth256BytesY[], + unsigned int BlockHeight256BytesY[], + unsigned int BlockWidth256BytesC[], + unsigned int BlockHeight256BytesC[], + + /* Output */ + double DisplayPipeLineDeliveryTimeLuma[], + double DisplayPipeLineDeliveryTimeChroma[], + double DisplayPipeLineDeliveryTimeLumaPrefetch[], + double DisplayPipeLineDeliveryTimeChromaPrefetch[], + double DisplayPipeRequestDeliveryTimeLuma[], + double DisplayPipeRequestDeliveryTimeChroma[], + double DisplayPipeRequestDeliveryTimeLumaPrefetch[], + double DisplayPipeRequestDeliveryTimeChromaPrefetch[], + double CursorRequestDeliveryTime[], + double CursorRequestDeliveryTimePrefetch[]); + +void dml32_CalculateMetaAndPTETimes( + bool use_one_row_for_frame[], + unsigned int NumberOfActiveSurfaces, + bool GPUVMEnable, + unsigned int MetaChunkSize, + unsigned int MinMetaChunkSizeBytes, + unsigned int HTotal[], + double VRatio[], + double VRatioChroma[], + double DestinationLinesToRequestRowInVBlank[], + double DestinationLinesToRequestRowInImmediateFlip[], + bool DCCEnable[], + double PixelClock[], + unsigned int BytePerPixelY[], + unsigned int BytePerPixelC[], + enum dm_rotation_angle SourceRotation[], + unsigned int dpte_row_height[], + unsigned int dpte_row_height_chroma[], + unsigned int meta_row_width[], + unsigned int meta_row_width_chroma[], + unsigned int meta_row_height[], + unsigned int meta_row_height_chroma[], + unsigned int meta_req_width[], + unsigned int meta_req_width_chroma[], + unsigned int meta_req_height[], + unsigned int meta_req_height_chroma[], + unsigned int dpte_group_bytes[], + unsigned int PTERequestSizeY[], + unsigned int PTERequestSizeC[], + unsigned int PixelPTEReqWidthY[], + unsigned int PixelPTEReqHeightY[], + unsigned int PixelPTEReqWidthC[], + unsigned int PixelPTEReqHeightC[], + unsigned int dpte_row_width_luma_ub[], + unsigned int dpte_row_width_chroma_ub[], + + /* Output */ + double DST_Y_PER_PTE_ROW_NOM_L[], + double DST_Y_PER_PTE_ROW_NOM_C[], + double DST_Y_PER_META_ROW_NOM_L[], + double DST_Y_PER_META_ROW_NOM_C[], + double TimePerMetaChunkNominal[], + double TimePerChromaMetaChunkNominal[], + double TimePerMetaChunkVBlank[], + double TimePerChromaMetaChunkVBlank[], + double TimePerMetaChunkFlip[], + double TimePerChromaMetaChunkFlip[], + double time_per_pte_group_nom_luma[], + double time_per_pte_group_vblank_luma[], + double time_per_pte_group_flip_luma[], + double time_per_pte_group_nom_chroma[], + double time_per_pte_group_vblank_chroma[], + double time_per_pte_group_flip_chroma[]); + +void dml32_CalculateVMGroupAndRequestTimes( + unsigned int NumberOfActiveSurfaces, + bool GPUVMEnable, + unsigned int GPUVMMaxPageTableLevels, + unsigned int HTotal[], + unsigned int BytePerPixelC[], + double DestinationLinesToRequestVMInVBlank[], + double DestinationLinesToRequestVMInImmediateFlip[], + bool DCCEnable[], + double PixelClock[], + unsigned int dpte_row_width_luma_ub[], + unsigned int dpte_row_width_chroma_ub[], + unsigned int vm_group_bytes[], + unsigned int dpde0_bytes_per_frame_ub_l[], + unsigned int dpde0_bytes_per_frame_ub_c[], + unsigned int meta_pte_bytes_per_frame_ub_l[], + unsigned int meta_pte_bytes_per_frame_ub_c[], + + /* Output */ + double TimePerVMGroupVBlank[], + double TimePerVMGroupFlip[], + double TimePerVMRequestVBlank[], + double TimePerVMRequestFlip[]); + +void dml32_CalculateDCCConfiguration( + bool DCCEnabled, + bool DCCProgrammingAssumesScanDirectionUnknown, + enum source_format_class SourcePixelFormat, + unsigned int SurfaceWidthLuma, + unsigned int SurfaceWidthChroma, + unsigned int SurfaceHeightLuma, + unsigned int SurfaceHeightChroma, + unsigned int nomDETInKByte, + unsigned int RequestHeight256ByteLuma, + unsigned int RequestHeight256ByteChroma, + enum dm_swizzle_mode TilingFormat, + unsigned int BytePerPixelY, + unsigned int BytePerPixelC, + double BytePerPixelDETY, + double BytePerPixelDETC, + enum dm_rotation_angle SourceRotation, + /* Output */ + unsigned int *MaxUncompressedBlockLuma, + unsigned int *MaxUncompressedBlockChroma, + unsigned int *MaxCompressedBlockLuma, + unsigned int *MaxCompressedBlockChroma, + unsigned int *IndependentBlockLuma, + unsigned int *IndependentBlockChroma); + +void dml32_CalculateStutterEfficiency( + unsigned int CompressedBufferSizeInkByte, + enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], + bool UnboundedRequestEnabled, + unsigned int MetaFIFOSizeInKEntries, + unsigned int ZeroSizeBufferEntries, + unsigned int PixelChunkSizeInKByte, + unsigned int NumberOfActiveSurfaces, + unsigned int ROBBufferSizeInKByte, + double TotalDataReadBandwidth, + double DCFCLK, + double ReturnBW, + unsigned int CompbufReservedSpace64B, + unsigned int CompbufReservedSpaceZs, + double SRExitTime, + double SRExitZ8Time, + bool SynchronizeTimingsFinal, + unsigned int BlendingAndTiming[], + double StutterEnterPlusExitWatermark, + double Z8StutterEnterPlusExitWatermark, + bool ProgressiveToInterlaceUnitInOPP, + bool Interlace[], + double MinTTUVBlank[], + unsigned int DPPPerSurface[], + unsigned int DETBufferSizeY[], + unsigned int BytePerPixelY[], + double BytePerPixelDETY[], + double SwathWidthY[], + unsigned int SwathHeightY[], + unsigned int SwathHeightC[], + double NetDCCRateLuma[], + double NetDCCRateChroma[], + double DCCFractionOfZeroSizeRequestsLuma[], + double DCCFractionOfZeroSizeRequestsChroma[], + unsigned int HTotal[], + unsigned int VTotal[], + double PixelClock[], + double VRatio[], + enum dm_rotation_angle SourceRotation[], + unsigned int BlockHeight256BytesY[], + unsigned int BlockWidth256BytesY[], + unsigned int BlockHeight256BytesC[], + unsigned int BlockWidth256BytesC[], + unsigned int DCCYMaxUncompressedBlock[], + unsigned int DCCCMaxUncompressedBlock[], + unsigned int VActive[], + bool DCCEnable[], + bool WritebackEnable[], + double ReadBandwidthSurfaceLuma[], + double ReadBandwidthSurfaceChroma[], + double meta_row_bw[], + double dpte_row_bw[], + + /* Output */ + double *StutterEfficiencyNotIncludingVBlank, + double *StutterEfficiency, + unsigned int *NumberOfStutterBurstsPerFrame, + double *Z8StutterEfficiencyNotIncludingVBlank, + double *Z8StutterEfficiency, + unsigned int *Z8NumberOfStutterBurstsPerFrame, + double *StutterPeriod, + bool *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE); + +void dml32_CalculateMaxDETAndMinCompressedBufferSize( + unsigned int ConfigReturnBufferSizeInKByte, + unsigned int ROBBufferSizeInKByte, + unsigned int MaxNumDPP, + bool nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size + unsigned int nomDETInKByteOverrideValue, // VBA_DELTA + + /* Output */ + unsigned int *MaxTotalDETInKByte, + unsigned int *nomDETInKByte, + unsigned int *MinCompressedBufferSizeInKByte); + +bool dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces, + double ReturnBW, + bool NotUrgentLatencyHiding[], + double ReadBandwidthLuma[], + double ReadBandwidthChroma[], + double cursor_bw[], + double meta_row_bandwidth[], + double dpte_row_bandwidth[], + unsigned int NumberOfDPP[], + double UrgentBurstFactorLuma[], + double UrgentBurstFactorChroma[], + double UrgentBurstFactorCursor[]); + +void dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces, + double ReturnBW, + bool NotUrgentLatencyHiding[], + double ReadBandwidthLuma[], + double ReadBandwidthChroma[], + double PrefetchBandwidthLuma[], + double PrefetchBandwidthChroma[], + double cursor_bw[], + double meta_row_bandwidth[], + double dpte_row_bandwidth[], + double cursor_bw_pre[], + double prefetch_vmrow_bw[], + unsigned int NumberOfDPP[], + double UrgentBurstFactorLuma[], + double UrgentBurstFactorChroma[], + double UrgentBurstFactorCursor[], + double UrgentBurstFactorLumaPre[], + double UrgentBurstFactorChromaPre[], + double UrgentBurstFactorCursorPre[], + + /* output */ + double *PrefetchBandwidth, + double *FractionOfUrgentBandwidth, + bool *PrefetchBandwidthSupport); + +double dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces, + double ReturnBW, + double ReadBandwidthLuma[], + double ReadBandwidthChroma[], + double PrefetchBandwidthLuma[], + double PrefetchBandwidthChroma[], + double cursor_bw[], + double cursor_bw_pre[], + unsigned int NumberOfDPP[], + double UrgentBurstFactorLuma[], + double UrgentBurstFactorChroma[], + double UrgentBurstFactorCursor[], + double UrgentBurstFactorLumaPre[], + double UrgentBurstFactorChromaPre[], + double UrgentBurstFactorCursorPre[]); + +void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces, + double ReturnBW, + enum immediate_flip_requirement ImmediateFlipRequirement[], + double final_flip_bw[], + double ReadBandwidthLuma[], + double ReadBandwidthChroma[], + double PrefetchBandwidthLuma[], + double PrefetchBandwidthChroma[], + double cursor_bw[], + double meta_row_bandwidth[], + double dpte_row_bandwidth[], + double cursor_bw_pre[], + double prefetch_vmrow_bw[], + unsigned int NumberOfDPP[], + double UrgentBurstFactorLuma[], + double UrgentBurstFactorChroma[], + double UrgentBurstFactorCursor[], + double UrgentBurstFactorLumaPre[], + double UrgentBurstFactorChromaPre[], + double UrgentBurstFactorCursorPre[], + + /* output */ + double *TotalBandwidth, + double *FractionOfUrgentBandwidth, + bool *ImmediateFlipBandwidthSupport); + +bool dml32_CalculateDETSwathFillLatencyHiding(unsigned int NumberOfActiveSurfaces, + double ReturnBW, + double UrgentLatency, + unsigned int SwathHeightY[], + unsigned int SwathHeightC[], + unsigned int SwathWidthY[], + unsigned int SwathWidthC[], + double BytePerPixelInDETY[], + double BytePerPixelInDETC[], + unsigned int DETBufferSizeY[], + unsigned int DETBufferSizeC[], + unsigned int NumOfDPP[], + unsigned int HTotal[], + double PixelClock[], + double VRatioY[], + double VRatioC[], + enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[DC__NUM_DPP__MAX]); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.c new file mode 100644 index 000000000..9ba6cb676 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.c @@ -0,0 +1,615 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + + +#include "../display_mode_lib.h" +#include "../display_mode_vba.h" +#include "../dml_inline_defs.h" +#include "display_rq_dlg_calc_32.h" + +static bool is_dual_plane(enum source_format_class source_format) +{ + bool ret_val = 0; + + if ((source_format == dm_420_12) || (source_format == dm_420_8) || (source_format == dm_420_10) + || (source_format == dm_rgbe_alpha)) + ret_val = 1; + + return ret_val; +} + +void dml32_rq_dlg_get_rq_reg(display_rq_regs_st *rq_regs, + struct display_mode_lib *mode_lib, + const display_e2e_pipe_params_st *e2e_pipe_param, + const unsigned int num_pipes, + const unsigned int pipe_idx) +{ + const display_pipe_source_params_st *src = &e2e_pipe_param[pipe_idx].pipe.src; + bool dual_plane = is_dual_plane((enum source_format_class) (src->source_format)); + double stored_swath_l_bytes; + double stored_swath_c_bytes; + bool is_phantom_pipe; + uint32_t pixel_chunk_bytes = 0; + uint32_t min_pixel_chunk_bytes = 0; + uint32_t meta_chunk_bytes = 0; + uint32_t min_meta_chunk_bytes = 0; + uint32_t dpte_group_bytes = 0; + uint32_t mpte_group_bytes = 0; + + uint32_t p1_pixel_chunk_bytes = 0; + uint32_t p1_min_pixel_chunk_bytes = 0; + uint32_t p1_meta_chunk_bytes = 0; + uint32_t p1_min_meta_chunk_bytes = 0; + uint32_t p1_dpte_group_bytes = 0; + uint32_t p1_mpte_group_bytes = 0; + + unsigned int detile_buf_size_in_bytes; + unsigned int detile_buf_plane1_addr; + unsigned int pte_row_height_linear; + + memset(rq_regs, 0, sizeof(*rq_regs)); + + dml_print("DML_DLG::%s: Calculation for pipe[%d] start, num_pipes=%d\n", __func__, pipe_idx, num_pipes); + + pixel_chunk_bytes = get_pixel_chunk_size_in_kbyte(mode_lib, e2e_pipe_param, num_pipes) * 1024; // From VBA + min_pixel_chunk_bytes = get_min_pixel_chunk_size_in_byte(mode_lib, e2e_pipe_param, num_pipes); // From VBA + + if (pixel_chunk_bytes == 64 * 1024) + min_pixel_chunk_bytes = 0; + + meta_chunk_bytes = get_meta_chunk_size_in_kbyte(mode_lib, e2e_pipe_param, num_pipes) * 1024; // From VBA + min_meta_chunk_bytes = get_min_meta_chunk_size_in_byte(mode_lib, e2e_pipe_param, num_pipes); // From VBA + + dpte_group_bytes = get_dpte_group_size_in_bytes(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA + mpte_group_bytes = get_vm_group_size_in_bytes(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA + + p1_pixel_chunk_bytes = pixel_chunk_bytes; + p1_min_pixel_chunk_bytes = min_pixel_chunk_bytes; + p1_meta_chunk_bytes = meta_chunk_bytes; + p1_min_meta_chunk_bytes = min_meta_chunk_bytes; + p1_dpte_group_bytes = dpte_group_bytes; + p1_mpte_group_bytes = mpte_group_bytes; + + if ((enum source_format_class) src->source_format == dm_rgbe_alpha) + p1_pixel_chunk_bytes = get_alpha_pixel_chunk_size_in_kbyte(mode_lib, e2e_pipe_param, num_pipes) * 1024; + + rq_regs->rq_regs_l.chunk_size = dml_log2(pixel_chunk_bytes) - 10; + rq_regs->rq_regs_c.chunk_size = dml_log2(p1_pixel_chunk_bytes) - 10; + + if (min_pixel_chunk_bytes == 0) + rq_regs->rq_regs_l.min_chunk_size = 0; + else + rq_regs->rq_regs_l.min_chunk_size = dml_log2(min_pixel_chunk_bytes) - 8 + 1; + + if (p1_min_pixel_chunk_bytes == 0) + rq_regs->rq_regs_c.min_chunk_size = 0; + else + rq_regs->rq_regs_c.min_chunk_size = dml_log2(p1_min_pixel_chunk_bytes) - 8 + 1; + + rq_regs->rq_regs_l.meta_chunk_size = dml_log2(meta_chunk_bytes) - 10; + rq_regs->rq_regs_c.meta_chunk_size = dml_log2(p1_meta_chunk_bytes) - 10; + + if (min_meta_chunk_bytes == 0) + rq_regs->rq_regs_l.min_meta_chunk_size = 0; + else + rq_regs->rq_regs_l.min_meta_chunk_size = dml_log2(min_meta_chunk_bytes) - 6 + 1; + + if (p1_min_meta_chunk_bytes == 0) + rq_regs->rq_regs_c.min_meta_chunk_size = 0; + else + rq_regs->rq_regs_c.min_meta_chunk_size = dml_log2(p1_min_meta_chunk_bytes) - 6 + 1; + + rq_regs->rq_regs_l.dpte_group_size = dml_log2(dpte_group_bytes) - 6; + rq_regs->rq_regs_l.mpte_group_size = dml_log2(mpte_group_bytes) - 6; + rq_regs->rq_regs_c.dpte_group_size = dml_log2(p1_dpte_group_bytes) - 6; + rq_regs->rq_regs_c.mpte_group_size = dml_log2(p1_mpte_group_bytes) - 6; + + detile_buf_size_in_bytes = get_det_buffer_size_kbytes(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * 1024; + detile_buf_plane1_addr = 0; + pte_row_height_linear = get_dpte_row_height_linear_l(mode_lib, e2e_pipe_param, num_pipes, + pipe_idx); + + if (src->sw_mode == dm_sw_linear) + ASSERT(pte_row_height_linear >= 8); + + rq_regs->rq_regs_l.pte_row_height_linear = dml_floor(dml_log2(pte_row_height_linear), 1) - 3; + + if (dual_plane) { + unsigned int p1_pte_row_height_linear = get_dpte_row_height_linear_c(mode_lib, e2e_pipe_param, + num_pipes, pipe_idx); + ; + if (src->sw_mode == dm_sw_linear) + ASSERT(p1_pte_row_height_linear >= 8); + + rq_regs->rq_regs_c.pte_row_height_linear = dml_floor(dml_log2(p1_pte_row_height_linear), 1) - 3; + } + + rq_regs->rq_regs_l.swath_height = dml_log2(get_swath_height_l(mode_lib, e2e_pipe_param, num_pipes, pipe_idx)); + rq_regs->rq_regs_c.swath_height = dml_log2(get_swath_height_c(mode_lib, e2e_pipe_param, num_pipes, pipe_idx)); + + // FIXME: take the max between luma, chroma chunk size? + // okay for now, as we are setting pixel_chunk_bytes to 8kb anyways + if (pixel_chunk_bytes >= 32 * 1024 || (dual_plane && p1_pixel_chunk_bytes >= 32 * 1024)) { //32kb + rq_regs->drq_expansion_mode = 0; + } else { + rq_regs->drq_expansion_mode = 2; + } + rq_regs->prq_expansion_mode = 1; + rq_regs->mrq_expansion_mode = 1; + rq_regs->crq_expansion_mode = 1; + + stored_swath_l_bytes = get_det_stored_buffer_size_l_bytes(mode_lib, e2e_pipe_param, num_pipes, + pipe_idx); + stored_swath_c_bytes = get_det_stored_buffer_size_c_bytes(mode_lib, e2e_pipe_param, num_pipes, + pipe_idx); + is_phantom_pipe = get_is_phantom_pipe(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + + // Note: detile_buf_plane1_addr is in unit of 1KB + if (dual_plane) { + if (is_phantom_pipe) { + detile_buf_plane1_addr = ((1024.0 * 1024.0) / 2.0 / 1024.0); // half to chroma + } else { + if (stored_swath_l_bytes / stored_swath_c_bytes <= 1.5) { + detile_buf_plane1_addr = (detile_buf_size_in_bytes / 2.0 / 1024.0); // half to chroma +#ifdef __DML_RQ_DLG_CALC_DEBUG__ + dml_print("DML_DLG: %s: detile_buf_plane1_addr = %d (1/2 to chroma)\n", + __func__, detile_buf_plane1_addr); +#endif + } else { + detile_buf_plane1_addr = + dml_round_to_multiple( + (unsigned int) ((2.0 * detile_buf_size_in_bytes) / 3.0), + 1024, 0) / 1024.0; // 2/3 to luma +#ifdef __DML_RQ_DLG_CALC_DEBUG__ + dml_print("DML_DLG: %s: detile_buf_plane1_addr = %d (1/3 chroma)\n", + __func__, detile_buf_plane1_addr); +#endif + } + } + } + rq_regs->plane1_base_address = detile_buf_plane1_addr; + +#ifdef __DML_RQ_DLG_CALC_DEBUG__ + dml_print("DML_DLG: %s: is_phantom_pipe = %d\n", __func__, is_phantom_pipe); + dml_print("DML_DLG: %s: stored_swath_l_bytes = %f\n", __func__, stored_swath_l_bytes); + dml_print("DML_DLG: %s: stored_swath_c_bytes = %f\n", __func__, stored_swath_c_bytes); + dml_print("DML_DLG: %s: detile_buf_size_in_bytes = %d\n", __func__, detile_buf_size_in_bytes); + dml_print("DML_DLG: %s: detile_buf_plane1_addr = %d\n", __func__, detile_buf_plane1_addr); + dml_print("DML_DLG: %s: plane1_base_address = %d\n", __func__, rq_regs->plane1_base_address); +#endif + print__rq_regs_st(mode_lib, rq_regs); + dml_print("DML_DLG::%s: Calculation for pipe[%d] done, num_pipes=%d\n", __func__, pipe_idx, num_pipes); +} + +void dml32_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib, + display_dlg_regs_st *dlg_regs, + display_ttu_regs_st *ttu_regs, + display_e2e_pipe_params_st *e2e_pipe_param, + const unsigned int num_pipes, + const unsigned int pipe_idx) +{ + const display_pipe_source_params_st *src = &e2e_pipe_param[pipe_idx].pipe.src; + const display_pipe_dest_params_st *dst = &e2e_pipe_param[pipe_idx].pipe.dest; + const display_clocks_and_cfg_st *clks = &e2e_pipe_param[pipe_idx].clks_cfg; + double refcyc_per_req_delivery_pre_cur0 = 0.; + double refcyc_per_req_delivery_cur0 = 0.; + double refcyc_per_req_delivery_pre_c = 0.; + double refcyc_per_req_delivery_c = 0.; + double refcyc_per_req_delivery_pre_l; + double refcyc_per_req_delivery_l; + double refcyc_per_line_delivery_pre_c = 0.; + double refcyc_per_line_delivery_c = 0.; + double refcyc_per_line_delivery_pre_l; + double refcyc_per_line_delivery_l; + double min_ttu_vblank; + double vratio_pre_l; + double vratio_pre_c; + unsigned int min_dst_y_next_start; + unsigned int htotal = dst->htotal; + unsigned int hblank_end = dst->hblank_end; + unsigned int vblank_end = dst->vblank_end; + bool interlaced = dst->interlaced; + double pclk_freq_in_mhz = dst->pixel_rate_mhz; + unsigned int vready_after_vcount0; + double refclk_freq_in_mhz = clks->refclk_mhz; + double ref_freq_to_pix_freq = refclk_freq_in_mhz / pclk_freq_in_mhz; + bool dual_plane = 0; + unsigned int pipe_index_in_combine[DC__NUM_PIPES__MAX]; + unsigned int dst_x_after_scaler; + unsigned int dst_y_after_scaler; + double dst_y_prefetch; + double dst_y_per_vm_vblank; + double dst_y_per_row_vblank; + double dst_y_per_vm_flip; + double dst_y_per_row_flip; + double max_dst_y_per_vm_vblank = 32.0; + double max_dst_y_per_row_vblank = 16.0; + double dst_y_per_pte_row_nom_l; + double dst_y_per_pte_row_nom_c; + double dst_y_per_meta_row_nom_l; + double dst_y_per_meta_row_nom_c; + double refcyc_per_pte_group_nom_l; + double refcyc_per_pte_group_nom_c; + double refcyc_per_pte_group_vblank_l; + double refcyc_per_pte_group_vblank_c; + double refcyc_per_pte_group_flip_l; + double refcyc_per_pte_group_flip_c; + double refcyc_per_meta_chunk_nom_l; + double refcyc_per_meta_chunk_nom_c; + double refcyc_per_meta_chunk_vblank_l; + double refcyc_per_meta_chunk_vblank_c; + double refcyc_per_meta_chunk_flip_l; + double refcyc_per_meta_chunk_flip_c; + + memset(dlg_regs, 0, sizeof(*dlg_regs)); + memset(ttu_regs, 0, sizeof(*ttu_regs)); + dml_print("DML_DLG::%s: Calculation for pipe[%d] starts, num_pipes=%d\n", __func__, pipe_idx, num_pipes); + dml_print("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, refclk_freq_in_mhz); + dml_print("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, pclk_freq_in_mhz); + dml_print("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, ref_freq_to_pix_freq); + dml_print("DML_DLG: %s: interlaced = %d\n", __func__, interlaced); + ASSERT(ref_freq_to_pix_freq < 4.0); + + dlg_regs->ref_freq_to_pix_freq = (unsigned int) (ref_freq_to_pix_freq * dml_pow(2, 19)); + dlg_regs->refcyc_per_htotal = (unsigned int) (ref_freq_to_pix_freq * (double) htotal * dml_pow(2, 8)); + dlg_regs->dlg_vblank_end = interlaced ? (vblank_end / 2) : vblank_end; // 15 bits + + min_ttu_vblank = get_min_ttu_vblank_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA + min_dst_y_next_start = get_min_dst_y_next_start(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + + dml_print("DML_DLG: %s: min_ttu_vblank (us) = %3.2f\n", __func__, min_ttu_vblank); + dml_print("DML_DLG: %s: min_dst_y_next_start = %d\n", __func__, min_dst_y_next_start); + dml_print("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, ref_freq_to_pix_freq); + + dual_plane = is_dual_plane((enum source_format_class) (src->source_format)); + + vready_after_vcount0 = get_vready_at_or_after_vsync(mode_lib, e2e_pipe_param, num_pipes, + pipe_idx); // From VBA + dlg_regs->vready_after_vcount0 = vready_after_vcount0; + + dml_print("DML_DLG: %s: vready_after_vcount0 = %d\n", __func__, dlg_regs->vready_after_vcount0); + + dst_x_after_scaler = dml_ceil(get_dst_x_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx), 1); + dst_y_after_scaler = dml_ceil(get_dst_y_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx), 1); + + // do some adjustment on the dst_after scaler to account for odm combine mode + dml_print("DML_DLG: %s: input dst_x_after_scaler = %d\n", __func__, dst_x_after_scaler); + dml_print("DML_DLG: %s: input dst_y_after_scaler = %d\n", __func__, dst_y_after_scaler); + + // need to figure out which side of odm combine we're in + if (dst->odm_combine == dm_odm_combine_mode_2to1 || dst->odm_combine == dm_odm_combine_mode_4to1) { + // figure out which pipes go together + bool visited[DC__NUM_PIPES__MAX]; + unsigned int i, j, k; + + for (k = 0; k < num_pipes; ++k) { + visited[k] = false; + pipe_index_in_combine[k] = 0; + } + + for (i = 0; i < num_pipes; i++) { + if (e2e_pipe_param[i].pipe.src.is_hsplit && !visited[i]) { + + unsigned int grp = e2e_pipe_param[i].pipe.src.hsplit_grp; + unsigned int grp_idx = 0; + + for (j = i; j < num_pipes; j++) { + if (e2e_pipe_param[j].pipe.src.hsplit_grp == grp + && e2e_pipe_param[j].pipe.src.is_hsplit && !visited[j]) { + pipe_index_in_combine[j] = grp_idx; + dml_print("DML_DLG: %s: pipe[%d] is in grp %d idx %d\n", + __func__, j, grp, grp_idx); + grp_idx++; + visited[j] = true; + } + } + } + } + } + + if (dst->odm_combine == dm_odm_combine_mode_disabled) { + // FIXME how about ODM split?? + dlg_regs->refcyc_h_blank_end = (unsigned int) ((double) hblank_end * ref_freq_to_pix_freq); + } else { + if (dst->odm_combine == dm_odm_combine_mode_2to1 || dst->odm_combine == dm_odm_combine_mode_4to1) { + // TODO: We should really check that 4to1 is supported before setting it to 4 + unsigned int odm_combine_factor = (dst->odm_combine == dm_odm_combine_mode_2to1 ? 2 : 4); + unsigned int odm_pipe_index = pipe_index_in_combine[pipe_idx]; + + dlg_regs->refcyc_h_blank_end = (unsigned int) (((double) hblank_end + + odm_pipe_index * (double) dst->hactive / odm_combine_factor) * ref_freq_to_pix_freq); + } + } + ASSERT(dlg_regs->refcyc_h_blank_end < (unsigned int)dml_pow(2, 13)); + + dml_print("DML_DLG: %s: htotal= %d\n", __func__, htotal); + dml_print("DML_DLG: %s: dst_x_after_scaler[%d]= %d\n", __func__, pipe_idx, dst_x_after_scaler); + dml_print("DML_DLG: %s: dst_y_after_scaler[%d] = %d\n", __func__, pipe_idx, dst_y_after_scaler); + + dst_y_prefetch = get_dst_y_prefetch(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA + // From VBA + dst_y_per_vm_vblank = get_dst_y_per_vm_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + // From VBA + dst_y_per_row_vblank = get_dst_y_per_row_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + dst_y_per_vm_flip = get_dst_y_per_vm_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA + dst_y_per_row_flip = get_dst_y_per_row_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA + + // magic! + if (htotal <= 75) { + max_dst_y_per_vm_vblank = 100.0; + max_dst_y_per_row_vblank = 100.0; + } + + dml_print("DML_DLG: %s: dst_y_prefetch (after rnd) = %3.2f\n", __func__, dst_y_prefetch); + dml_print("DML_DLG: %s: dst_y_per_vm_flip = %3.2f\n", __func__, dst_y_per_vm_flip); + dml_print("DML_DLG: %s: dst_y_per_row_flip = %3.2f\n", __func__, dst_y_per_row_flip); + dml_print("DML_DLG: %s: dst_y_per_vm_vblank = %3.2f\n", __func__, dst_y_per_vm_vblank); + dml_print("DML_DLG: %s: dst_y_per_row_vblank = %3.2f\n", __func__, dst_y_per_row_vblank); + + ASSERT(dst_y_per_vm_vblank < max_dst_y_per_vm_vblank); + ASSERT(dst_y_per_row_vblank < max_dst_y_per_row_vblank); + ASSERT(dst_y_prefetch > (dst_y_per_vm_vblank + dst_y_per_row_vblank)); + + vratio_pre_l = get_vratio_prefetch_l(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA + vratio_pre_c = get_vratio_prefetch_c(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA + + dml_print("DML_DLG: %s: vratio_pre_l = %3.2f\n", __func__, vratio_pre_l); + dml_print("DML_DLG: %s: vratio_pre_c = %3.2f\n", __func__, vratio_pre_c); + + // Active + refcyc_per_line_delivery_pre_l = get_refcyc_per_line_delivery_pre_l_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA + refcyc_per_line_delivery_l = get_refcyc_per_line_delivery_l_in_us(mode_lib, e2e_pipe_param, num_pipes, + pipe_idx) * refclk_freq_in_mhz; // From VBA + + dml_print("DML_DLG: %s: refcyc_per_line_delivery_pre_l = %3.2f\n", __func__, refcyc_per_line_delivery_pre_l); + dml_print("DML_DLG: %s: refcyc_per_line_delivery_l = %3.2f\n", __func__, refcyc_per_line_delivery_l); + + if (dual_plane) { + refcyc_per_line_delivery_pre_c = get_refcyc_per_line_delivery_pre_c_in_us(mode_lib, e2e_pipe_param, + num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA + refcyc_per_line_delivery_c = get_refcyc_per_line_delivery_c_in_us(mode_lib, e2e_pipe_param, num_pipes, + pipe_idx) * refclk_freq_in_mhz; // From VBA + + dml_print("DML_DLG: %s: refcyc_per_line_delivery_pre_c = %3.2f\n", + __func__, refcyc_per_line_delivery_pre_c); + dml_print("DML_DLG: %s: refcyc_per_line_delivery_c = %3.2f\n", + __func__, refcyc_per_line_delivery_c); + } + + if (src->dynamic_metadata_enable && src->gpuvm) + dlg_regs->refcyc_per_vm_dmdata = get_refcyc_per_vm_dmdata_in_us(mode_lib, e2e_pipe_param, num_pipes, + pipe_idx) * refclk_freq_in_mhz; // From VBA + + dlg_regs->dmdata_dl_delta = get_dmdata_dl_delta_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) + * refclk_freq_in_mhz; // From VBA + + refcyc_per_req_delivery_pre_l = get_refcyc_per_req_delivery_pre_l_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA + refcyc_per_req_delivery_l = get_refcyc_per_req_delivery_l_in_us(mode_lib, e2e_pipe_param, num_pipes, + pipe_idx) * refclk_freq_in_mhz; // From VBA + + dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_l = %3.2f\n", __func__, refcyc_per_req_delivery_pre_l); + dml_print("DML_DLG: %s: refcyc_per_req_delivery_l = %3.2f\n", __func__, refcyc_per_req_delivery_l); + + if (dual_plane) { + refcyc_per_req_delivery_pre_c = get_refcyc_per_req_delivery_pre_c_in_us(mode_lib, e2e_pipe_param, + num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA + refcyc_per_req_delivery_c = get_refcyc_per_req_delivery_c_in_us(mode_lib, e2e_pipe_param, num_pipes, + pipe_idx) * refclk_freq_in_mhz; // From VBA + + dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_c = %3.2f\n", + __func__, refcyc_per_req_delivery_pre_c); + dml_print("DML_DLG: %s: refcyc_per_req_delivery_c = %3.2f\n", __func__, refcyc_per_req_delivery_c); + } + + // TTU - Cursor + ASSERT(src->num_cursors <= 1); + if (src->num_cursors > 0) { + refcyc_per_req_delivery_pre_cur0 = get_refcyc_per_cursor_req_delivery_pre_in_us(mode_lib, + e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA + refcyc_per_req_delivery_cur0 = get_refcyc_per_cursor_req_delivery_in_us(mode_lib, e2e_pipe_param, + num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA + + dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_cur0 = %3.2f\n", + __func__, refcyc_per_req_delivery_pre_cur0); + dml_print("DML_DLG: %s: refcyc_per_req_delivery_cur0 = %3.2f\n", + __func__, refcyc_per_req_delivery_cur0); + } + + // Assign to register structures + dlg_regs->min_dst_y_next_start = min_dst_y_next_start * dml_pow(2, 2); + ASSERT(dlg_regs->min_dst_y_next_start < (unsigned int)dml_pow(2, 18)); + + dlg_regs->dst_y_after_scaler = dst_y_after_scaler; // in terms of line + dlg_regs->refcyc_x_after_scaler = dst_x_after_scaler * ref_freq_to_pix_freq; // in terms of refclk + dlg_regs->dst_y_prefetch = (unsigned int) (dst_y_prefetch * dml_pow(2, 2)); + dlg_regs->dst_y_per_vm_vblank = (unsigned int) (dst_y_per_vm_vblank * dml_pow(2, 2)); + dlg_regs->dst_y_per_row_vblank = (unsigned int) (dst_y_per_row_vblank * dml_pow(2, 2)); + dlg_regs->dst_y_per_vm_flip = (unsigned int) (dst_y_per_vm_flip * dml_pow(2, 2)); + dlg_regs->dst_y_per_row_flip = (unsigned int) (dst_y_per_row_flip * dml_pow(2, 2)); + + dlg_regs->vratio_prefetch = (unsigned int) (vratio_pre_l * dml_pow(2, 19)); + dlg_regs->vratio_prefetch_c = (unsigned int) (vratio_pre_c * dml_pow(2, 19)); + + dml_print("DML_DLG: %s: dlg_regs->dst_y_per_vm_vblank = 0x%x\n", __func__, dlg_regs->dst_y_per_vm_vblank); + dml_print("DML_DLG: %s: dlg_regs->dst_y_per_row_vblank = 0x%x\n", __func__, dlg_regs->dst_y_per_row_vblank); + dml_print("DML_DLG: %s: dlg_regs->dst_y_per_vm_flip = 0x%x\n", __func__, dlg_regs->dst_y_per_vm_flip); + dml_print("DML_DLG: %s: dlg_regs->dst_y_per_row_flip = 0x%x\n", __func__, dlg_regs->dst_y_per_row_flip); + + dlg_regs->refcyc_per_vm_group_vblank = get_refcyc_per_vm_group_vblank_in_us(mode_lib, e2e_pipe_param, + num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA + dlg_regs->refcyc_per_vm_group_flip = get_refcyc_per_vm_group_flip_in_us(mode_lib, e2e_pipe_param, num_pipes, + pipe_idx) * refclk_freq_in_mhz; // From VBA + dlg_regs->refcyc_per_vm_req_vblank = get_refcyc_per_vm_req_vblank_in_us(mode_lib, e2e_pipe_param, num_pipes, + pipe_idx) * refclk_freq_in_mhz * dml_pow(2, 10); // From VBA + dlg_regs->refcyc_per_vm_req_flip = get_refcyc_per_vm_req_flip_in_us(mode_lib, e2e_pipe_param, num_pipes, + pipe_idx) * refclk_freq_in_mhz * dml_pow(2, 10); // From VBA + + // From VBA + dst_y_per_pte_row_nom_l = get_dst_y_per_pte_row_nom_l(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + // From VBA + dst_y_per_pte_row_nom_c = get_dst_y_per_pte_row_nom_c(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + // From VBA + dst_y_per_meta_row_nom_l = get_dst_y_per_meta_row_nom_l(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + // From VBA + dst_y_per_meta_row_nom_c = get_dst_y_per_meta_row_nom_c(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + + refcyc_per_pte_group_nom_l = get_refcyc_per_pte_group_nom_l_in_us(mode_lib, e2e_pipe_param, num_pipes, + pipe_idx) * refclk_freq_in_mhz; // From VBA + refcyc_per_pte_group_nom_c = get_refcyc_per_pte_group_nom_c_in_us(mode_lib, e2e_pipe_param, num_pipes, + pipe_idx) * refclk_freq_in_mhz; // From VBA + refcyc_per_pte_group_vblank_l = get_refcyc_per_pte_group_vblank_l_in_us(mode_lib, e2e_pipe_param, + num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA + refcyc_per_pte_group_vblank_c = get_refcyc_per_pte_group_vblank_c_in_us(mode_lib, e2e_pipe_param, + num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA + refcyc_per_pte_group_flip_l = get_refcyc_per_pte_group_flip_l_in_us(mode_lib, e2e_pipe_param, num_pipes, + pipe_idx) * refclk_freq_in_mhz; // From VBA + refcyc_per_pte_group_flip_c = get_refcyc_per_pte_group_flip_c_in_us(mode_lib, e2e_pipe_param, num_pipes, + pipe_idx) * refclk_freq_in_mhz; // From VBA + + refcyc_per_meta_chunk_nom_l = get_refcyc_per_meta_chunk_nom_l_in_us(mode_lib, e2e_pipe_param, num_pipes, + pipe_idx) * refclk_freq_in_mhz; // From VBA + refcyc_per_meta_chunk_nom_c = get_refcyc_per_meta_chunk_nom_c_in_us(mode_lib, e2e_pipe_param, num_pipes, + pipe_idx) * refclk_freq_in_mhz; // From VBA + refcyc_per_meta_chunk_vblank_l = get_refcyc_per_meta_chunk_vblank_l_in_us(mode_lib, e2e_pipe_param, + num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA + refcyc_per_meta_chunk_vblank_c = get_refcyc_per_meta_chunk_vblank_c_in_us(mode_lib, e2e_pipe_param, + num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA + refcyc_per_meta_chunk_flip_l = get_refcyc_per_meta_chunk_flip_l_in_us(mode_lib, e2e_pipe_param, + num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA + refcyc_per_meta_chunk_flip_c = get_refcyc_per_meta_chunk_flip_c_in_us(mode_lib, e2e_pipe_param, + num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA + + dlg_regs->dst_y_per_pte_row_nom_l = dst_y_per_pte_row_nom_l * dml_pow(2, 2); + dlg_regs->dst_y_per_pte_row_nom_c = dst_y_per_pte_row_nom_c * dml_pow(2, 2); + dlg_regs->dst_y_per_meta_row_nom_l = dst_y_per_meta_row_nom_l * dml_pow(2, 2); + dlg_regs->dst_y_per_meta_row_nom_c = dst_y_per_meta_row_nom_c * dml_pow(2, 2); + dlg_regs->refcyc_per_pte_group_nom_l = refcyc_per_pte_group_nom_l; + dlg_regs->refcyc_per_pte_group_nom_c = refcyc_per_pte_group_nom_c; + dlg_regs->refcyc_per_pte_group_vblank_l = refcyc_per_pte_group_vblank_l; + dlg_regs->refcyc_per_pte_group_vblank_c = refcyc_per_pte_group_vblank_c; + dlg_regs->refcyc_per_pte_group_flip_l = refcyc_per_pte_group_flip_l; + dlg_regs->refcyc_per_pte_group_flip_c = refcyc_per_pte_group_flip_c; + dlg_regs->refcyc_per_meta_chunk_nom_l = refcyc_per_meta_chunk_nom_l; + dlg_regs->refcyc_per_meta_chunk_nom_c = refcyc_per_meta_chunk_nom_c; + dlg_regs->refcyc_per_meta_chunk_vblank_l = refcyc_per_meta_chunk_vblank_l; + dlg_regs->refcyc_per_meta_chunk_vblank_c = refcyc_per_meta_chunk_vblank_c; + dlg_regs->refcyc_per_meta_chunk_flip_l = refcyc_per_meta_chunk_flip_l; + dlg_regs->refcyc_per_meta_chunk_flip_c = refcyc_per_meta_chunk_flip_c; + dlg_regs->refcyc_per_line_delivery_pre_l = (unsigned int) dml_floor(refcyc_per_line_delivery_pre_l, 1); + dlg_regs->refcyc_per_line_delivery_l = (unsigned int) dml_floor(refcyc_per_line_delivery_l, 1); + dlg_regs->refcyc_per_line_delivery_pre_c = (unsigned int) dml_floor(refcyc_per_line_delivery_pre_c, 1); + dlg_regs->refcyc_per_line_delivery_c = (unsigned int) dml_floor(refcyc_per_line_delivery_c, 1); + + dlg_regs->chunk_hdl_adjust_cur0 = 3; + dlg_regs->dst_y_offset_cur0 = 0; + dlg_regs->chunk_hdl_adjust_cur1 = 3; + dlg_regs->dst_y_offset_cur1 = 0; + + dlg_regs->dst_y_delta_drq_limit = 0x7fff; // off + + ttu_regs->refcyc_per_req_delivery_pre_l = (unsigned int) (refcyc_per_req_delivery_pre_l * dml_pow(2, 10)); + ttu_regs->refcyc_per_req_delivery_l = (unsigned int) (refcyc_per_req_delivery_l * dml_pow(2, 10)); + ttu_regs->refcyc_per_req_delivery_pre_c = (unsigned int) (refcyc_per_req_delivery_pre_c * dml_pow(2, 10)); + ttu_regs->refcyc_per_req_delivery_c = (unsigned int) (refcyc_per_req_delivery_c * dml_pow(2, 10)); + ttu_regs->refcyc_per_req_delivery_pre_cur0 = + (unsigned int) (refcyc_per_req_delivery_pre_cur0 * dml_pow(2, 10)); + ttu_regs->refcyc_per_req_delivery_cur0 = (unsigned int) (refcyc_per_req_delivery_cur0 * dml_pow(2, 10)); + ttu_regs->refcyc_per_req_delivery_pre_cur1 = 0; + ttu_regs->refcyc_per_req_delivery_cur1 = 0; + ttu_regs->qos_level_low_wm = 0; + + ttu_regs->qos_level_high_wm = (unsigned int) (4.0 * (double) htotal * ref_freq_to_pix_freq); + + ttu_regs->qos_level_flip = 14; + ttu_regs->qos_level_fixed_l = 8; + ttu_regs->qos_level_fixed_c = 8; + ttu_regs->qos_level_fixed_cur0 = 8; + ttu_regs->qos_ramp_disable_l = 0; + ttu_regs->qos_ramp_disable_c = 0; + ttu_regs->qos_ramp_disable_cur0 = 0; + ttu_regs->min_ttu_vblank = min_ttu_vblank * refclk_freq_in_mhz; + + // CHECK for HW registers' range, assert or clamp + ASSERT(refcyc_per_req_delivery_pre_l < dml_pow(2, 13)); + ASSERT(refcyc_per_req_delivery_l < dml_pow(2, 13)); + ASSERT(refcyc_per_req_delivery_pre_c < dml_pow(2, 13)); + ASSERT(refcyc_per_req_delivery_c < dml_pow(2, 13)); + if (dlg_regs->refcyc_per_vm_group_vblank >= (unsigned int) dml_pow(2, 23)) + dlg_regs->refcyc_per_vm_group_vblank = dml_pow(2, 23) - 1; + + if (dlg_regs->refcyc_per_vm_group_flip >= (unsigned int) dml_pow(2, 23)) + dlg_regs->refcyc_per_vm_group_flip = dml_pow(2, 23) - 1; + + if (dlg_regs->refcyc_per_vm_req_vblank >= (unsigned int) dml_pow(2, 23)) + dlg_regs->refcyc_per_vm_req_vblank = dml_pow(2, 23) - 1; + + if (dlg_regs->refcyc_per_vm_req_flip >= (unsigned int) dml_pow(2, 23)) + dlg_regs->refcyc_per_vm_req_flip = dml_pow(2, 23) - 1; + + ASSERT(dlg_regs->dst_y_after_scaler < (unsigned int) 8); + ASSERT(dlg_regs->refcyc_x_after_scaler < (unsigned int)dml_pow(2, 13)); + ASSERT(dlg_regs->dst_y_per_pte_row_nom_l < (unsigned int)dml_pow(2, 17)); + if (dual_plane) { + if (dlg_regs->dst_y_per_pte_row_nom_c >= (unsigned int) dml_pow(2, 17)) { + // FIXME what so special about chroma, can we just assert? + dml_print("DML_DLG: %s: Warning dst_y_per_pte_row_nom_c %u > register max U15.2 %u\n", + __func__, dlg_regs->dst_y_per_pte_row_nom_c, (unsigned int)dml_pow(2, 17) - 1); + } + } + ASSERT(dlg_regs->dst_y_per_meta_row_nom_l < (unsigned int)dml_pow(2, 17)); + ASSERT(dlg_regs->dst_y_per_meta_row_nom_c < (unsigned int)dml_pow(2, 17)); + + if (dlg_regs->refcyc_per_pte_group_nom_l >= (unsigned int) dml_pow(2, 23)) + dlg_regs->refcyc_per_pte_group_nom_l = dml_pow(2, 23) - 1; + if (dual_plane) { + if (dlg_regs->refcyc_per_pte_group_nom_c >= (unsigned int) dml_pow(2, 23)) + dlg_regs->refcyc_per_pte_group_nom_c = dml_pow(2, 23) - 1; + } + ASSERT(dlg_regs->refcyc_per_pte_group_vblank_l < (unsigned int)dml_pow(2, 13)); + if (dual_plane) { + ASSERT(dlg_regs->refcyc_per_pte_group_vblank_c < (unsigned int)dml_pow(2, 13)); + } + + if (dlg_regs->refcyc_per_meta_chunk_nom_l >= (unsigned int) dml_pow(2, 23)) + dlg_regs->refcyc_per_meta_chunk_nom_l = dml_pow(2, 23) - 1; + if (dual_plane) { + if (dlg_regs->refcyc_per_meta_chunk_nom_c >= (unsigned int) dml_pow(2, 23)) + dlg_regs->refcyc_per_meta_chunk_nom_c = dml_pow(2, 23) - 1; + } + ASSERT(dlg_regs->refcyc_per_meta_chunk_vblank_l < (unsigned int)dml_pow(2, 13)); + ASSERT(dlg_regs->refcyc_per_meta_chunk_vblank_c < (unsigned int)dml_pow(2, 13)); + ASSERT(dlg_regs->refcyc_per_line_delivery_pre_l < (unsigned int)dml_pow(2, 13)); + ASSERT(dlg_regs->refcyc_per_line_delivery_l < (unsigned int)dml_pow(2, 13)); + ASSERT(dlg_regs->refcyc_per_line_delivery_pre_c < (unsigned int)dml_pow(2, 13)); + ASSERT(dlg_regs->refcyc_per_line_delivery_c < (unsigned int)dml_pow(2, 13)); + ASSERT(ttu_regs->qos_level_low_wm < dml_pow(2, 14)); + ASSERT(ttu_regs->qos_level_high_wm < dml_pow(2, 14)); + ASSERT(ttu_regs->min_ttu_vblank < dml_pow(2, 24)); + + print__ttu_regs_st(mode_lib, ttu_regs); + print__dlg_regs_st(mode_lib, dlg_regs); + dml_print("DML_DLG::%s: Calculation for pipe[%d] done, num_pipes=%d\n", __func__, pipe_idx, num_pipes); +} + diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.h new file mode 100644 index 000000000..ebee36529 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.h @@ -0,0 +1,70 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DML32_DISPLAY_RQ_DLG_CALC_H__ +#define __DML32_DISPLAY_RQ_DLG_CALC_H__ + +#include "../display_rq_dlg_helpers.h" + +struct display_mode_lib; + +/* +* Function: dml_rq_dlg_get_rq_reg +* Main entry point for test to get the register values out of this DML class. +* This function calls <get_rq_param> and <extract_rq_regs> functions to calculate +* and then populate the rq_regs struct +* Input: +* pipe_param - pipe source configuration (e.g. vp, pitch, scaling, dest, etc.) +* Output: +* rq_regs - struct that holds all the RQ registers field value. +* See also: <display_rq_regs_st> +*/ +void dml32_rq_dlg_get_rq_reg(display_rq_regs_st *rq_regs, + struct display_mode_lib *mode_lib, + const display_e2e_pipe_params_st *e2e_pipe_param, + const unsigned int num_pipes, + const unsigned int pipe_idx); + +/* +* Function: dml_rq_dlg_get_dlg_reg +* Calculate and return DLG and TTU register struct given the system setting +* Output: +* dlg_regs - output DLG register struct +* ttu_regs - output DLG TTU register struct +* Input: +* e2e_pipe_param - "compacted" array of e2e pipe param struct +* num_pipes - num of active "pipe" or "route" +* pipe_idx - index that identifies the e2e_pipe_param that corresponding to this dlg +* cstate - 0: when calculate min_ttu_vblank it is assumed cstate is not required. 1: Normal mode, cstate is considered. +* Added for legacy or unrealistic timing tests. +*/ +void dml32_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib, + display_dlg_regs_st *dlg_regs, + display_ttu_regs_st *ttu_regs, + display_e2e_pipe_params_st *e2e_pipe_param, + const unsigned int num_pipes, + const unsigned int pipe_idx); + +#endif |