diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
commit | 26a029d407be480d791972afb5975cf62c9360a6 (patch) | |
tree | f435a8308119effd964b339f76abb83a57c29483 /third_party/aom/av1/encoder/pickcdef.c | |
parent | Initial commit. (diff) | |
download | firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip |
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/aom/av1/encoder/pickcdef.c')
-rw-r--r-- | third_party/aom/av1/encoder/pickcdef.c | 958 |
1 files changed, 958 insertions, 0 deletions
diff --git a/third_party/aom/av1/encoder/pickcdef.c b/third_party/aom/av1/encoder/pickcdef.c new file mode 100644 index 0000000000..232a2f9edb --- /dev/null +++ b/third_party/aom/av1/encoder/pickcdef.c @@ -0,0 +1,958 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <math.h> +#include <stdbool.h> +#include <string.h> + +#include "config/aom_dsp_rtcd.h" +#include "config/aom_scale_rtcd.h" + +#include "aom/aom_integer.h" +#include "av1/common/av1_common_int.h" +#include "av1/common/reconinter.h" +#include "av1/encoder/encoder.h" +#include "av1/encoder/ethread.h" +#include "av1/encoder/pickcdef.h" +#include "av1/encoder/mcomp.h" + +// Get primary and secondary filter strength for the given strength index and +// search method +static INLINE void get_cdef_filter_strengths(CDEF_PICK_METHOD pick_method, + int *pri_strength, + int *sec_strength, + int strength_idx) { + const int tot_sec_filter = + (pick_method == CDEF_FAST_SEARCH_LVL5) + ? REDUCED_SEC_STRENGTHS_LVL5 + : ((pick_method >= CDEF_FAST_SEARCH_LVL3) ? REDUCED_SEC_STRENGTHS_LVL3 + : CDEF_SEC_STRENGTHS); + const int pri_idx = strength_idx / tot_sec_filter; + const int sec_idx = strength_idx % tot_sec_filter; + *pri_strength = pri_idx; + *sec_strength = sec_idx; + if (pick_method == CDEF_FULL_SEARCH) return; + + switch (pick_method) { + case CDEF_FAST_SEARCH_LVL1: + assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL1); + *pri_strength = priconv_lvl1[pri_idx]; + break; + case CDEF_FAST_SEARCH_LVL2: + assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL2); + *pri_strength = priconv_lvl2[pri_idx]; + break; + case CDEF_FAST_SEARCH_LVL3: + assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL2); + assert(sec_idx < REDUCED_SEC_STRENGTHS_LVL3); + *pri_strength = priconv_lvl2[pri_idx]; + *sec_strength = secconv_lvl3[sec_idx]; + break; + case CDEF_FAST_SEARCH_LVL4: + assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL4); + assert(sec_idx < REDUCED_SEC_STRENGTHS_LVL3); + *pri_strength = priconv_lvl4[pri_idx]; + *sec_strength = secconv_lvl3[sec_idx]; + break; + case CDEF_FAST_SEARCH_LVL5: + assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL4); + assert(sec_idx < REDUCED_SEC_STRENGTHS_LVL5); + *pri_strength = priconv_lvl5[pri_idx]; + *sec_strength = secconv_lvl5[sec_idx]; + break; + default: assert(0 && "Invalid CDEF search method"); + } +} + +// Store CDEF filter strength calculated from strength index for given search +// method +#define STORE_CDEF_FILTER_STRENGTH(cdef_strength, pick_method, strength_idx) \ + do { \ + get_cdef_filter_strengths((pick_method), &pri_strength, &sec_strength, \ + (strength_idx)); \ + cdef_strength = pri_strength * CDEF_SEC_STRENGTHS + sec_strength; \ + } while (0) + +/* Search for the best strength to add as an option, knowing we + already selected nb_strengths options. */ +static uint64_t search_one(int *lev, int nb_strengths, + uint64_t mse[][TOTAL_STRENGTHS], int sb_count, + CDEF_PICK_METHOD pick_method) { + uint64_t tot_mse[TOTAL_STRENGTHS]; + const int total_strengths = nb_cdef_strengths[pick_method]; + int i, j; + uint64_t best_tot_mse = (uint64_t)1 << 63; + int best_id = 0; + memset(tot_mse, 0, sizeof(tot_mse)); + for (i = 0; i < sb_count; i++) { + int gi; + uint64_t best_mse = (uint64_t)1 << 63; + /* Find best mse among already selected options. */ + for (gi = 0; gi < nb_strengths; gi++) { + if (mse[i][lev[gi]] < best_mse) { + best_mse = mse[i][lev[gi]]; + } + } + /* Find best mse when adding each possible new option. */ + for (j = 0; j < total_strengths; j++) { + uint64_t best = best_mse; + if (mse[i][j] < best) best = mse[i][j]; + tot_mse[j] += best; + } + } + for (j = 0; j < total_strengths; j++) { + if (tot_mse[j] < best_tot_mse) { + best_tot_mse = tot_mse[j]; + best_id = j; + } + } + lev[nb_strengths] = best_id; + return best_tot_mse; +} + +/* Search for the best luma+chroma strength to add as an option, knowing we + already selected nb_strengths options. */ +static uint64_t search_one_dual(int *lev0, int *lev1, int nb_strengths, + uint64_t (**mse)[TOTAL_STRENGTHS], int sb_count, + CDEF_PICK_METHOD pick_method) { + uint64_t tot_mse[TOTAL_STRENGTHS][TOTAL_STRENGTHS]; + int i, j; + uint64_t best_tot_mse = (uint64_t)1 << 63; + int best_id0 = 0; + int best_id1 = 0; + const int total_strengths = nb_cdef_strengths[pick_method]; + memset(tot_mse, 0, sizeof(tot_mse)); + for (i = 0; i < sb_count; i++) { + int gi; + uint64_t best_mse = (uint64_t)1 << 63; + /* Find best mse among already selected options. */ + for (gi = 0; gi < nb_strengths; gi++) { + uint64_t curr = mse[0][i][lev0[gi]]; + curr += mse[1][i][lev1[gi]]; + if (curr < best_mse) { + best_mse = curr; + } + } + /* Find best mse when adding each possible new option. */ + for (j = 0; j < total_strengths; j++) { + int k; + for (k = 0; k < total_strengths; k++) { + uint64_t best = best_mse; + uint64_t curr = mse[0][i][j]; + curr += mse[1][i][k]; + if (curr < best) best = curr; + tot_mse[j][k] += best; + } + } + } + for (j = 0; j < total_strengths; j++) { + int k; + for (k = 0; k < total_strengths; k++) { + if (tot_mse[j][k] < best_tot_mse) { + best_tot_mse = tot_mse[j][k]; + best_id0 = j; + best_id1 = k; + } + } + } + lev0[nb_strengths] = best_id0; + lev1[nb_strengths] = best_id1; + return best_tot_mse; +} + +/* Search for the set of strengths that minimizes mse. */ +static uint64_t joint_strength_search(int *best_lev, int nb_strengths, + uint64_t mse[][TOTAL_STRENGTHS], + int sb_count, + CDEF_PICK_METHOD pick_method) { + uint64_t best_tot_mse; + int fast = (pick_method >= CDEF_FAST_SEARCH_LVL1 && + pick_method <= CDEF_FAST_SEARCH_LVL5); + int i; + best_tot_mse = (uint64_t)1 << 63; + /* Greedy search: add one strength options at a time. */ + for (i = 0; i < nb_strengths; i++) { + best_tot_mse = search_one(best_lev, i, mse, sb_count, pick_method); + } + /* Trying to refine the greedy search by reconsidering each + already-selected option. */ + if (!fast) { + for (i = 0; i < 4 * nb_strengths; i++) { + int j; + for (j = 0; j < nb_strengths - 1; j++) best_lev[j] = best_lev[j + 1]; + best_tot_mse = + search_one(best_lev, nb_strengths - 1, mse, sb_count, pick_method); + } + } + return best_tot_mse; +} + +/* Search for the set of luma+chroma strengths that minimizes mse. */ +static uint64_t joint_strength_search_dual(int *best_lev0, int *best_lev1, + int nb_strengths, + uint64_t (**mse)[TOTAL_STRENGTHS], + int sb_count, + CDEF_PICK_METHOD pick_method) { + uint64_t best_tot_mse; + int i; + best_tot_mse = (uint64_t)1 << 63; + /* Greedy search: add one strength options at a time. */ + for (i = 0; i < nb_strengths; i++) { + best_tot_mse = + search_one_dual(best_lev0, best_lev1, i, mse, sb_count, pick_method); + } + /* Trying to refine the greedy search by reconsidering each + already-selected option. */ + for (i = 0; i < 4 * nb_strengths; i++) { + int j; + for (j = 0; j < nb_strengths - 1; j++) { + best_lev0[j] = best_lev0[j + 1]; + best_lev1[j] = best_lev1[j + 1]; + } + best_tot_mse = search_one_dual(best_lev0, best_lev1, nb_strengths - 1, mse, + sb_count, pick_method); + } + return best_tot_mse; +} + +static INLINE void init_src_params(int *src_stride, int *width, int *height, + int *width_log2, int *height_log2, + BLOCK_SIZE bsize) { + *src_stride = block_size_wide[bsize]; + *width = block_size_wide[bsize]; + *height = block_size_high[bsize]; + *width_log2 = MI_SIZE_LOG2 + mi_size_wide_log2[bsize]; + *height_log2 = MI_SIZE_LOG2 + mi_size_wide_log2[bsize]; +} +#if CONFIG_AV1_HIGHBITDEPTH +/* Compute MSE only on the blocks we filtered. */ +static uint64_t compute_cdef_dist_highbd(void *dst, int dstride, uint16_t *src, + cdef_list *dlist, int cdef_count, + BLOCK_SIZE bsize, int coeff_shift, + int row, int col) { + assert(bsize == BLOCK_4X4 || bsize == BLOCK_4X8 || bsize == BLOCK_8X4 || + bsize == BLOCK_8X8); + uint64_t sum = 0; + int bi, bx, by; + uint16_t *dst16 = CONVERT_TO_SHORTPTR((uint8_t *)dst); + uint16_t *dst_buff = &dst16[row * dstride + col]; + int src_stride, width, height, width_log2, height_log2; + init_src_params(&src_stride, &width, &height, &width_log2, &height_log2, + bsize); + for (bi = 0; bi < cdef_count; bi++) { + by = dlist[bi].by; + bx = dlist[bi].bx; + sum += aom_mse_wxh_16bit_highbd( + &dst_buff[(by << height_log2) * dstride + (bx << width_log2)], dstride, + &src[bi << (height_log2 + width_log2)], src_stride, width, height); + } + return sum >> 2 * coeff_shift; +} +#endif + +// Checks dual and quad block processing is applicable for block widths 8 and 4 +// respectively. +static INLINE int is_dual_or_quad_applicable(cdef_list *dlist, int width, + int cdef_count, int bi, int iter) { + assert(width == 8 || width == 4); + const int blk_offset = (width == 8) ? 1 : 3; + if ((iter + blk_offset) >= cdef_count) return 0; + + if (dlist[bi].by == dlist[bi + blk_offset].by && + dlist[bi].bx + blk_offset == dlist[bi + blk_offset].bx) + return 1; + + return 0; +} + +static uint64_t compute_cdef_dist(void *dst, int dstride, uint16_t *src, + cdef_list *dlist, int cdef_count, + BLOCK_SIZE bsize, int coeff_shift, int row, + int col) { + assert(bsize == BLOCK_4X4 || bsize == BLOCK_4X8 || bsize == BLOCK_8X4 || + bsize == BLOCK_8X8); + uint64_t sum = 0; + int bi, bx, by; + int iter = 0; + int inc = 1; + uint8_t *dst8 = (uint8_t *)dst; + uint8_t *dst_buff = &dst8[row * dstride + col]; + int src_stride, width, height, width_log2, height_log2; + init_src_params(&src_stride, &width, &height, &width_log2, &height_log2, + bsize); + + const int num_blks = 16 / width; + for (bi = 0; bi < cdef_count; bi += inc) { + by = dlist[bi].by; + bx = dlist[bi].bx; + uint16_t *src_tmp = &src[bi << (height_log2 + width_log2)]; + uint8_t *dst_tmp = + &dst_buff[(by << height_log2) * dstride + (bx << width_log2)]; + + if (is_dual_or_quad_applicable(dlist, width, cdef_count, bi, iter)) { + sum += aom_mse_16xh_16bit(dst_tmp, dstride, src_tmp, width, height); + iter += num_blks; + inc = num_blks; + } else { + sum += aom_mse_wxh_16bit(dst_tmp, dstride, src_tmp, src_stride, width, + height); + iter += 1; + inc = 1; + } + } + + return sum >> 2 * coeff_shift; +} + +// Fill the boundary regions of the block with CDEF_VERY_LARGE, only if the +// region is outside frame boundary +static INLINE void fill_borders_for_fbs_on_frame_boundary( + uint16_t *inbuf, int hfilt_size, int vfilt_size, + bool is_fb_on_frm_left_boundary, bool is_fb_on_frm_right_boundary, + bool is_fb_on_frm_top_boundary, bool is_fb_on_frm_bottom_boundary) { + if (!is_fb_on_frm_left_boundary && !is_fb_on_frm_right_boundary && + !is_fb_on_frm_top_boundary && !is_fb_on_frm_bottom_boundary) + return; + if (is_fb_on_frm_bottom_boundary) { + // Fill bottom region of the block + const int buf_offset = + (vfilt_size + CDEF_VBORDER) * CDEF_BSTRIDE + CDEF_HBORDER; + fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, CDEF_VBORDER, hfilt_size, + CDEF_VERY_LARGE); + } + if (is_fb_on_frm_bottom_boundary || is_fb_on_frm_left_boundary) { + const int buf_offset = (vfilt_size + CDEF_VBORDER) * CDEF_BSTRIDE; + // Fill bottom-left region of the block + fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER, + CDEF_VERY_LARGE); + } + if (is_fb_on_frm_bottom_boundary || is_fb_on_frm_right_boundary) { + const int buf_offset = + (vfilt_size + CDEF_VBORDER) * CDEF_BSTRIDE + hfilt_size + CDEF_HBORDER; + // Fill bottom-right region of the block + fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER, + CDEF_VERY_LARGE); + } + if (is_fb_on_frm_top_boundary) { + // Fill top region of the block + fill_rect(&inbuf[CDEF_HBORDER], CDEF_BSTRIDE, CDEF_VBORDER, hfilt_size, + CDEF_VERY_LARGE); + } + if (is_fb_on_frm_top_boundary || is_fb_on_frm_left_boundary) { + // Fill top-left region of the block + fill_rect(inbuf, CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER, CDEF_VERY_LARGE); + } + if (is_fb_on_frm_top_boundary || is_fb_on_frm_right_boundary) { + const int buf_offset = hfilt_size + CDEF_HBORDER; + // Fill top-right region of the block + fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER, + CDEF_VERY_LARGE); + } + if (is_fb_on_frm_left_boundary) { + const int buf_offset = CDEF_VBORDER * CDEF_BSTRIDE; + // Fill left region of the block + fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, vfilt_size, CDEF_HBORDER, + CDEF_VERY_LARGE); + } + if (is_fb_on_frm_right_boundary) { + const int buf_offset = CDEF_VBORDER * CDEF_BSTRIDE; + // Fill right region of the block + fill_rect(&inbuf[buf_offset + hfilt_size + CDEF_HBORDER], CDEF_BSTRIDE, + vfilt_size, CDEF_HBORDER, CDEF_VERY_LARGE); + } +} + +// Calculate the number of 8x8/4x4 filter units for which SSE can be calculated +// after CDEF filtering in single function call +static AOM_FORCE_INLINE int get_error_calc_width_in_filt_units( + cdef_list *dlist, int cdef_count, int bi, int subsampling_x, + int subsampling_y) { + // TODO(Ranjit): Extend the optimization for 422 + if (subsampling_x != subsampling_y) return 1; + + // Combining more blocks seems to increase encode time due to increase in + // control code + if (bi + 3 < cdef_count && dlist[bi].by == dlist[bi + 3].by && + dlist[bi].bx + 3 == dlist[bi + 3].bx) { + /* Calculate error for four 8x8/4x4 blocks using 32x8/16x4 block specific + * logic if y co-ordinates match and x co-ordinates are + * separated by 3 for first and fourth 8x8/4x4 blocks in dlist[]. */ + return 4; + } + if (bi + 1 < cdef_count && dlist[bi].by == dlist[bi + 1].by && + dlist[bi].bx + 1 == dlist[bi + 1].bx) { + /* Calculate error for two 8x8/4x4 blocks using 16x8/8x4 block specific + * logic if their y co-ordinates match and x co-ordinates are + * separated by 1 for first and second 8x8/4x4 blocks in dlist[]. */ + return 2; + } + return 1; +} + +// Returns the block error after CDEF filtering for a given strength +static INLINE uint64_t get_filt_error( + const CdefSearchCtx *cdef_search_ctx, const struct macroblockd_plane *pd, + cdef_list *dlist, int dir[CDEF_NBLOCKS][CDEF_NBLOCKS], int *dirinit, + int var[CDEF_NBLOCKS][CDEF_NBLOCKS], uint16_t *in, uint8_t *ref_buffer, + int ref_stride, int row, int col, int pri_strength, int sec_strength, + int cdef_count, int pli, int coeff_shift, BLOCK_SIZE bs) { + uint64_t curr_sse = 0; + const BLOCK_SIZE plane_bsize = + get_plane_block_size(bs, pd->subsampling_x, pd->subsampling_y); + const int bw_log2 = 3 - pd->subsampling_x; + const int bh_log2 = 3 - pd->subsampling_y; + + // TODO(Ranjit): Extend this optimization for HBD + if (!cdef_search_ctx->use_highbitdepth) { + // If all 8x8/4x4 blocks in CDEF block need to be filtered, calculate the + // error at CDEF block level + const int tot_blk_count = + (block_size_wide[plane_bsize] * block_size_high[plane_bsize]) >> + (bw_log2 + bh_log2); + if (cdef_count == tot_blk_count) { + // Calculate the offset in the buffer based on block position + const FULLPEL_MV this_mv = { row, col }; + const int buf_offset = get_offset_from_fullmv(&this_mv, ref_stride); + if (pri_strength == 0 && sec_strength == 0) { + // When CDEF strength is zero, filtering is not applied. Hence + // error is calculated between source and unfiltered pixels + curr_sse = + aom_sse(&ref_buffer[buf_offset], ref_stride, + get_buf_from_fullmv(&pd->dst, &this_mv), pd->dst.stride, + block_size_wide[plane_bsize], block_size_high[plane_bsize]); + } else { + DECLARE_ALIGNED(32, uint8_t, tmp_dst8[1 << (MAX_SB_SIZE_LOG2 * 2)]); + + av1_cdef_filter_fb(tmp_dst8, NULL, (1 << MAX_SB_SIZE_LOG2), in, + cdef_search_ctx->xdec[pli], + cdef_search_ctx->ydec[pli], dir, dirinit, var, pli, + dlist, cdef_count, pri_strength, + sec_strength + (sec_strength == 3), + cdef_search_ctx->damping, coeff_shift); + curr_sse = + aom_sse(&ref_buffer[buf_offset], ref_stride, tmp_dst8, + (1 << MAX_SB_SIZE_LOG2), block_size_wide[plane_bsize], + block_size_high[plane_bsize]); + } + } else { + // If few 8x8/4x4 blocks in CDEF block need to be filtered, filtering + // functions produce 8-bit output and the error is calculated in 8-bit + // domain + if (pri_strength == 0 && sec_strength == 0) { + int num_error_calc_filt_units = 1; + for (int bi = 0; bi < cdef_count; bi = bi + num_error_calc_filt_units) { + const uint8_t by = dlist[bi].by; + const uint8_t bx = dlist[bi].bx; + const int16_t by_pos = (by << bh_log2); + const int16_t bx_pos = (bx << bw_log2); + // Calculate the offset in the buffer based on block position + const FULLPEL_MV this_mv = { row + by_pos, col + bx_pos }; + const int buf_offset = get_offset_from_fullmv(&this_mv, ref_stride); + num_error_calc_filt_units = get_error_calc_width_in_filt_units( + dlist, cdef_count, bi, pd->subsampling_x, pd->subsampling_y); + curr_sse += aom_sse( + &ref_buffer[buf_offset], ref_stride, + get_buf_from_fullmv(&pd->dst, &this_mv), pd->dst.stride, + num_error_calc_filt_units * (1 << bw_log2), (1 << bh_log2)); + } + } else { + DECLARE_ALIGNED(32, uint8_t, tmp_dst8[1 << (MAX_SB_SIZE_LOG2 * 2)]); + av1_cdef_filter_fb(tmp_dst8, NULL, (1 << MAX_SB_SIZE_LOG2), in, + cdef_search_ctx->xdec[pli], + cdef_search_ctx->ydec[pli], dir, dirinit, var, pli, + dlist, cdef_count, pri_strength, + sec_strength + (sec_strength == 3), + cdef_search_ctx->damping, coeff_shift); + int num_error_calc_filt_units = 1; + for (int bi = 0; bi < cdef_count; bi = bi + num_error_calc_filt_units) { + const uint8_t by = dlist[bi].by; + const uint8_t bx = dlist[bi].bx; + const int16_t by_pos = (by << bh_log2); + const int16_t bx_pos = (bx << bw_log2); + // Calculate the offset in the buffer based on block position + const FULLPEL_MV this_mv = { row + by_pos, col + bx_pos }; + const FULLPEL_MV tmp_buf_pos = { by_pos, bx_pos }; + const int buf_offset = get_offset_from_fullmv(&this_mv, ref_stride); + const int tmp_buf_offset = + get_offset_from_fullmv(&tmp_buf_pos, (1 << MAX_SB_SIZE_LOG2)); + num_error_calc_filt_units = get_error_calc_width_in_filt_units( + dlist, cdef_count, bi, pd->subsampling_x, pd->subsampling_y); + curr_sse += aom_sse( + &ref_buffer[buf_offset], ref_stride, &tmp_dst8[tmp_buf_offset], + (1 << MAX_SB_SIZE_LOG2), + num_error_calc_filt_units * (1 << bw_log2), (1 << bh_log2)); + } + } + } + } else { + DECLARE_ALIGNED(32, uint16_t, tmp_dst[1 << (MAX_SB_SIZE_LOG2 * 2)]); + + av1_cdef_filter_fb(NULL, tmp_dst, CDEF_BSTRIDE, in, + cdef_search_ctx->xdec[pli], cdef_search_ctx->ydec[pli], + dir, dirinit, var, pli, dlist, cdef_count, pri_strength, + sec_strength + (sec_strength == 3), + cdef_search_ctx->damping, coeff_shift); + curr_sse = cdef_search_ctx->compute_cdef_dist_fn( + ref_buffer, ref_stride, tmp_dst, dlist, cdef_count, + cdef_search_ctx->bsize[pli], coeff_shift, row, col); + } + return curr_sse; +} + +// Calculates MSE at block level. +// Inputs: +// cdef_search_ctx: Pointer to the structure containing parameters related to +// CDEF search context. +// fbr: Row index in units of 64x64 block +// fbc: Column index in units of 64x64 block +// Returns: +// Nothing will be returned. Contents of cdef_search_ctx will be modified. +void av1_cdef_mse_calc_block(CdefSearchCtx *cdef_search_ctx, + struct aom_internal_error_info *error_info, + int fbr, int fbc, int sb_count) { + // TODO(aomedia:3276): Pass error_info to the low-level functions as required + // in future to handle error propagation. + (void)error_info; + const CommonModeInfoParams *const mi_params = cdef_search_ctx->mi_params; + const YV12_BUFFER_CONFIG *ref = cdef_search_ctx->ref; + const int coeff_shift = cdef_search_ctx->coeff_shift; + const int *mi_wide_l2 = cdef_search_ctx->mi_wide_l2; + const int *mi_high_l2 = cdef_search_ctx->mi_high_l2; + + // Declare and initialize the temporary buffers. + DECLARE_ALIGNED(32, uint16_t, inbuf[CDEF_INBUF_SIZE]); + cdef_list dlist[MI_SIZE_128X128 * MI_SIZE_128X128]; + int dir[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } }; + int var[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } }; + uint16_t *const in = inbuf + CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER; + int nhb = AOMMIN(MI_SIZE_64X64, mi_params->mi_cols - MI_SIZE_64X64 * fbc); + int nvb = AOMMIN(MI_SIZE_64X64, mi_params->mi_rows - MI_SIZE_64X64 * fbr); + int hb_step = 1, vb_step = 1; + BLOCK_SIZE bs; + + const MB_MODE_INFO *const mbmi = + mi_params->mi_grid_base[MI_SIZE_64X64 * fbr * mi_params->mi_stride + + MI_SIZE_64X64 * fbc]; + + uint8_t *ref_buffer[MAX_MB_PLANE] = { ref->y_buffer, ref->u_buffer, + ref->v_buffer }; + int ref_stride[MAX_MB_PLANE] = { ref->y_stride, ref->uv_stride, + ref->uv_stride }; + + if (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_128X64 || + mbmi->bsize == BLOCK_64X128) { + bs = mbmi->bsize; + if (bs == BLOCK_128X128 || bs == BLOCK_128X64) { + nhb = AOMMIN(MI_SIZE_128X128, mi_params->mi_cols - MI_SIZE_64X64 * fbc); + hb_step = 2; + } + if (bs == BLOCK_128X128 || bs == BLOCK_64X128) { + nvb = AOMMIN(MI_SIZE_128X128, mi_params->mi_rows - MI_SIZE_64X64 * fbr); + vb_step = 2; + } + } else { + bs = BLOCK_64X64; + } + // Get number of 8x8 blocks which are not skip. Cdef processing happens for + // 8x8 blocks which are not skip. + const int cdef_count = av1_cdef_compute_sb_list( + mi_params, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64, dlist, bs); + const bool is_fb_on_frm_left_boundary = (fbc == 0); + const bool is_fb_on_frm_right_boundary = + (fbc + hb_step == cdef_search_ctx->nhfb); + const bool is_fb_on_frm_top_boundary = (fbr == 0); + const bool is_fb_on_frm_bottom_boundary = + (fbr + vb_step == cdef_search_ctx->nvfb); + const int yoff = CDEF_VBORDER * (!is_fb_on_frm_top_boundary); + const int xoff = CDEF_HBORDER * (!is_fb_on_frm_left_boundary); + int dirinit = 0; + for (int pli = 0; pli < cdef_search_ctx->num_planes; pli++) { + /* We avoid filtering the pixels for which some of the pixels to + average are outside the frame. We could change the filter instead, + but it would add special cases for any future vectorization. */ + const int hfilt_size = (nhb << mi_wide_l2[pli]); + const int vfilt_size = (nvb << mi_high_l2[pli]); + const int ysize = + vfilt_size + CDEF_VBORDER * (!is_fb_on_frm_bottom_boundary) + yoff; + const int xsize = + hfilt_size + CDEF_HBORDER * (!is_fb_on_frm_right_boundary) + xoff; + const int row = fbr * MI_SIZE_64X64 << mi_high_l2[pli]; + const int col = fbc * MI_SIZE_64X64 << mi_wide_l2[pli]; + struct macroblockd_plane pd = cdef_search_ctx->plane[pli]; + cdef_search_ctx->copy_fn(&in[(-yoff * CDEF_BSTRIDE - xoff)], CDEF_BSTRIDE, + pd.dst.buf, row - yoff, col - xoff, pd.dst.stride, + ysize, xsize); + fill_borders_for_fbs_on_frame_boundary( + inbuf, hfilt_size, vfilt_size, is_fb_on_frm_left_boundary, + is_fb_on_frm_right_boundary, is_fb_on_frm_top_boundary, + is_fb_on_frm_bottom_boundary); + for (int gi = 0; gi < cdef_search_ctx->total_strengths; gi++) { + int pri_strength, sec_strength; + get_cdef_filter_strengths(cdef_search_ctx->pick_method, &pri_strength, + &sec_strength, gi); + const uint64_t curr_mse = get_filt_error( + cdef_search_ctx, &pd, dlist, dir, &dirinit, var, in, ref_buffer[pli], + ref_stride[pli], row, col, pri_strength, sec_strength, cdef_count, + pli, coeff_shift, bs); + if (pli < 2) + cdef_search_ctx->mse[pli][sb_count][gi] = curr_mse; + else + cdef_search_ctx->mse[1][sb_count][gi] += curr_mse; + } + } + cdef_search_ctx->sb_index[sb_count] = + MI_SIZE_64X64 * fbr * mi_params->mi_stride + MI_SIZE_64X64 * fbc; +} + +// MSE calculation at frame level. +// Inputs: +// cdef_search_ctx: Pointer to the structure containing parameters related to +// CDEF search context. +// Returns: +// Nothing will be returned. Contents of cdef_search_ctx will be modified. +static void cdef_mse_calc_frame(CdefSearchCtx *cdef_search_ctx, + struct aom_internal_error_info *error_info) { + // Loop over each sb. + for (int fbr = 0; fbr < cdef_search_ctx->nvfb; ++fbr) { + for (int fbc = 0; fbc < cdef_search_ctx->nhfb; ++fbc) { + // Checks if cdef processing can be skipped for particular sb. + if (cdef_sb_skip(cdef_search_ctx->mi_params, fbr, fbc)) continue; + // Calculate mse for each sb and store the relevant sb index. + av1_cdef_mse_calc_block(cdef_search_ctx, error_info, fbr, fbc, + cdef_search_ctx->sb_count); + cdef_search_ctx->sb_count++; + } + } +} + +// Allocates memory for members of CdefSearchCtx. +// Inputs: +// cdef_search_ctx: Pointer to the structure containing parameters +// related to CDEF search context. +// Returns: +// Nothing will be returned. Contents of cdef_search_ctx will be modified. +static void cdef_alloc_data(AV1_COMMON *cm, CdefSearchCtx *cdef_search_ctx) { + const int nvfb = cdef_search_ctx->nvfb; + const int nhfb = cdef_search_ctx->nhfb; + CHECK_MEM_ERROR( + cm, cdef_search_ctx->sb_index, + aom_malloc(nvfb * nhfb * sizeof(cdef_search_ctx->sb_index[0]))); + cdef_search_ctx->sb_count = 0; + CHECK_MEM_ERROR(cm, cdef_search_ctx->mse[0], + aom_malloc(sizeof(**cdef_search_ctx->mse) * nvfb * nhfb)); + CHECK_MEM_ERROR(cm, cdef_search_ctx->mse[1], + aom_malloc(sizeof(**cdef_search_ctx->mse) * nvfb * nhfb)); +} + +// Deallocates the memory allocated for members of CdefSearchCtx. +// Inputs: +// cdef_search_ctx: Pointer to the structure containing parameters +// related to CDEF search context. +// Returns: +// Nothing will be returned. +void av1_cdef_dealloc_data(CdefSearchCtx *cdef_search_ctx) { + if (cdef_search_ctx) { + aom_free(cdef_search_ctx->mse[0]); + cdef_search_ctx->mse[0] = NULL; + aom_free(cdef_search_ctx->mse[1]); + cdef_search_ctx->mse[1] = NULL; + aom_free(cdef_search_ctx->sb_index); + cdef_search_ctx->sb_index = NULL; + } +} + +// Initialize the parameters related to CDEF search context. +// Inputs: +// frame: Pointer to compressed frame buffer +// ref: Pointer to the frame buffer holding the source frame +// cm: Pointer to top level common structure +// xd: Pointer to common current coding block structure +// cdef_search_ctx: Pointer to the structure containing parameters related to +// CDEF search context. +// pick_method: Search method used to select CDEF parameters +// Returns: +// Nothing will be returned. Contents of cdef_search_ctx will be modified. +static AOM_INLINE void cdef_params_init(const YV12_BUFFER_CONFIG *frame, + const YV12_BUFFER_CONFIG *ref, + AV1_COMMON *cm, MACROBLOCKD *xd, + CdefSearchCtx *cdef_search_ctx, + CDEF_PICK_METHOD pick_method) { + const CommonModeInfoParams *const mi_params = &cm->mi_params; + const int num_planes = av1_num_planes(cm); + cdef_search_ctx->mi_params = &cm->mi_params; + cdef_search_ctx->ref = ref; + cdef_search_ctx->nvfb = + (mi_params->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64; + cdef_search_ctx->nhfb = + (mi_params->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64; + cdef_search_ctx->coeff_shift = AOMMAX(cm->seq_params->bit_depth - 8, 0); + cdef_search_ctx->damping = 3 + (cm->quant_params.base_qindex >> 6); + cdef_search_ctx->total_strengths = nb_cdef_strengths[pick_method]; + cdef_search_ctx->num_planes = num_planes; + cdef_search_ctx->pick_method = pick_method; + cdef_search_ctx->sb_count = 0; + cdef_search_ctx->use_highbitdepth = cm->seq_params->use_highbitdepth; + av1_setup_dst_planes(xd->plane, cm->seq_params->sb_size, frame, 0, 0, 0, + num_planes); + // Initialize plane wise information. + for (int pli = 0; pli < num_planes; pli++) { + cdef_search_ctx->xdec[pli] = xd->plane[pli].subsampling_x; + cdef_search_ctx->ydec[pli] = xd->plane[pli].subsampling_y; + cdef_search_ctx->bsize[pli] = + cdef_search_ctx->ydec[pli] + ? (cdef_search_ctx->xdec[pli] ? BLOCK_4X4 : BLOCK_8X4) + : (cdef_search_ctx->xdec[pli] ? BLOCK_4X8 : BLOCK_8X8); + cdef_search_ctx->mi_wide_l2[pli] = + MI_SIZE_LOG2 - xd->plane[pli].subsampling_x; + cdef_search_ctx->mi_high_l2[pli] = + MI_SIZE_LOG2 - xd->plane[pli].subsampling_y; + cdef_search_ctx->plane[pli] = xd->plane[pli]; + } + // Function pointer initialization. +#if CONFIG_AV1_HIGHBITDEPTH + if (cm->seq_params->use_highbitdepth) { + cdef_search_ctx->copy_fn = av1_cdef_copy_sb8_16_highbd; + cdef_search_ctx->compute_cdef_dist_fn = compute_cdef_dist_highbd; + } else { + cdef_search_ctx->copy_fn = av1_cdef_copy_sb8_16_lowbd; + cdef_search_ctx->compute_cdef_dist_fn = compute_cdef_dist; + } +#else + cdef_search_ctx->copy_fn = av1_cdef_copy_sb8_16_lowbd; + cdef_search_ctx->compute_cdef_dist_fn = compute_cdef_dist; +#endif +} + +void av1_pick_cdef_from_qp(AV1_COMMON *const cm, int skip_cdef, + int is_screen_content) { + const int bd = cm->seq_params->bit_depth; + const int q = + av1_ac_quant_QTX(cm->quant_params.base_qindex, 0, bd) >> (bd - 8); + CdefInfo *const cdef_info = &cm->cdef_info; + // Check the speed feature to avoid extra signaling. + if (skip_cdef) { + cdef_info->cdef_bits = 1; + cdef_info->nb_cdef_strengths = 2; + } else { + cdef_info->cdef_bits = 0; + cdef_info->nb_cdef_strengths = 1; + } + cdef_info->cdef_damping = 3 + (cm->quant_params.base_qindex >> 6); + + int predicted_y_f1 = 0; + int predicted_y_f2 = 0; + int predicted_uv_f1 = 0; + int predicted_uv_f2 = 0; + if (is_screen_content) { + predicted_y_f1 = + (int)(5.88217781e-06 * q * q + 6.10391455e-03 * q + 9.95043102e-02); + predicted_y_f2 = + (int)(-7.79934857e-06 * q * q + 6.58957830e-03 * q + 8.81045025e-01); + predicted_uv_f1 = + (int)(-6.79500136e-06 * q * q + 1.02695586e-02 * q + 1.36126802e-01); + predicted_uv_f2 = + (int)(-9.99613695e-08 * q * q - 1.79361339e-05 * q + 1.17022324e+0); + predicted_y_f1 = clamp(predicted_y_f1, 0, 15); + predicted_y_f2 = clamp(predicted_y_f2, 0, 3); + predicted_uv_f1 = clamp(predicted_uv_f1, 0, 15); + predicted_uv_f2 = clamp(predicted_uv_f2, 0, 3); + } else { + if (!frame_is_intra_only(cm)) { + predicted_y_f1 = clamp((int)roundf(q * q * -0.0000023593946f + + q * 0.0068615186f + 0.02709886f), + 0, 15); + predicted_y_f2 = clamp((int)roundf(q * q * -0.00000057629734f + + q * 0.0013993345f + 0.03831067f), + 0, 3); + predicted_uv_f1 = clamp((int)roundf(q * q * -0.0000007095069f + + q * 0.0034628846f + 0.00887099f), + 0, 15); + predicted_uv_f2 = clamp((int)roundf(q * q * 0.00000023874085f + + q * 0.00028223585f + 0.05576307f), + 0, 3); + } else { + predicted_y_f1 = clamp( + (int)roundf(q * q * 0.0000033731974f + q * 0.008070594f + 0.0187634f), + 0, 15); + predicted_y_f2 = clamp((int)roundf(q * q * 0.0000029167343f + + q * 0.0027798624f + 0.0079405f), + 0, 3); + predicted_uv_f1 = clamp((int)roundf(q * q * -0.0000130790995f + + q * 0.012892405f - 0.00748388f), + 0, 15); + predicted_uv_f2 = clamp((int)roundf(q * q * 0.0000032651783f + + q * 0.00035520183f + 0.00228092f), + 0, 3); + } + } + cdef_info->cdef_strengths[0] = + predicted_y_f1 * CDEF_SEC_STRENGTHS + predicted_y_f2; + cdef_info->cdef_uv_strengths[0] = + predicted_uv_f1 * CDEF_SEC_STRENGTHS + predicted_uv_f2; + + // mbmi->cdef_strength is already set in the encoding stage. We don't need to + // set it again here. + if (skip_cdef) { + cdef_info->cdef_strengths[1] = 0; + cdef_info->cdef_uv_strengths[1] = 0; + return; + } + + const CommonModeInfoParams *const mi_params = &cm->mi_params; + const int nvfb = (mi_params->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64; + const int nhfb = (mi_params->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64; + MB_MODE_INFO **mbmi = mi_params->mi_grid_base; + // mbmi is NULL when real-time rate control library is used. + if (!mbmi) return; + for (int r = 0; r < nvfb; ++r) { + for (int c = 0; c < nhfb; ++c) { + MB_MODE_INFO *current_mbmi = mbmi[MI_SIZE_64X64 * c]; + current_mbmi->cdef_strength = 0; + } + mbmi += MI_SIZE_64X64 * mi_params->mi_stride; + } +} + +void av1_cdef_search(AV1_COMP *cpi) { + AV1_COMMON *cm = &cpi->common; + CDEF_CONTROL cdef_control = cpi->oxcf.tool_cfg.cdef_control; + + assert(cdef_control != CDEF_NONE); + if (cdef_control == CDEF_REFERENCE && cpi->ppi->rtc_ref.non_reference_frame) { + CdefInfo *const cdef_info = &cm->cdef_info; + cdef_info->nb_cdef_strengths = 1; + cdef_info->cdef_bits = 0; + cdef_info->cdef_strengths[0] = 0; + cdef_info->cdef_uv_strengths[0] = 0; + return; + } + + // Indicate if external RC is used for testing + const int rtc_ext_rc = cpi->rc.rtc_external_ratectrl; + if (rtc_ext_rc) { + av1_pick_cdef_from_qp(cm, 0, 0); + return; + } + CDEF_PICK_METHOD pick_method = cpi->sf.lpf_sf.cdef_pick_method; + if (pick_method == CDEF_PICK_FROM_Q) { + const int use_screen_content_model = + cm->quant_params.base_qindex > + AOMMAX(cpi->sf.rt_sf.screen_content_cdef_filter_qindex_thresh, + cpi->rc.best_quality + 5) && + cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN; + av1_pick_cdef_from_qp(cm, cpi->sf.rt_sf.skip_cdef_sb, + use_screen_content_model); + return; + } + const CommonModeInfoParams *const mi_params = &cm->mi_params; + const int damping = 3 + (cm->quant_params.base_qindex >> 6); + const int fast = (pick_method >= CDEF_FAST_SEARCH_LVL1 && + pick_method <= CDEF_FAST_SEARCH_LVL5); + const int num_planes = av1_num_planes(cm); + MACROBLOCKD *xd = &cpi->td.mb.e_mbd; + + if (!cpi->cdef_search_ctx) + CHECK_MEM_ERROR(cm, cpi->cdef_search_ctx, + aom_malloc(sizeof(*cpi->cdef_search_ctx))); + CdefSearchCtx *cdef_search_ctx = cpi->cdef_search_ctx; + + // Initialize parameters related to CDEF search context. + cdef_params_init(&cm->cur_frame->buf, cpi->source, cm, xd, cdef_search_ctx, + pick_method); + // Allocate CDEF search context buffers. + cdef_alloc_data(cm, cdef_search_ctx); + // Frame level mse calculation. + if (cpi->mt_info.num_workers > 1) { + av1_cdef_mse_calc_frame_mt(cpi); + } else { + cdef_mse_calc_frame(cdef_search_ctx, cm->error); + } + + /* Search for different number of signaling bits. */ + int nb_strength_bits = 0; + uint64_t best_rd = UINT64_MAX; + CdefInfo *const cdef_info = &cm->cdef_info; + int sb_count = cdef_search_ctx->sb_count; + uint64_t(*mse[2])[TOTAL_STRENGTHS]; + mse[0] = cdef_search_ctx->mse[0]; + mse[1] = cdef_search_ctx->mse[1]; + /* Calculate the maximum number of bits required to signal CDEF strengths at + * block level */ + const int total_strengths = nb_cdef_strengths[pick_method]; + const int joint_strengths = + num_planes > 1 ? total_strengths * total_strengths : total_strengths; + const int max_signaling_bits = + joint_strengths == 1 ? 0 : get_msb(joint_strengths - 1) + 1; + int rdmult = cpi->td.mb.rdmult; + for (int i = 0; i <= 3; i++) { + if (i > max_signaling_bits) break; + int best_lev0[CDEF_MAX_STRENGTHS]; + int best_lev1[CDEF_MAX_STRENGTHS] = { 0 }; + const int nb_strengths = 1 << i; + uint64_t tot_mse; + if (num_planes > 1) { + tot_mse = joint_strength_search_dual(best_lev0, best_lev1, nb_strengths, + mse, sb_count, pick_method); + } else { + tot_mse = joint_strength_search(best_lev0, nb_strengths, mse[0], sb_count, + pick_method); + } + + const int total_bits = sb_count * i + nb_strengths * CDEF_STRENGTH_BITS * + (num_planes > 1 ? 2 : 1); + const int rate_cost = av1_cost_literal(total_bits); + const uint64_t dist = tot_mse * 16; + const uint64_t rd = RDCOST(rdmult, rate_cost, dist); + if (rd < best_rd) { + best_rd = rd; + nb_strength_bits = i; + memcpy(cdef_info->cdef_strengths, best_lev0, + nb_strengths * sizeof(best_lev0[0])); + if (num_planes > 1) { + memcpy(cdef_info->cdef_uv_strengths, best_lev1, + nb_strengths * sizeof(best_lev1[0])); + } + } + } + + cdef_info->cdef_bits = nb_strength_bits; + cdef_info->nb_cdef_strengths = 1 << nb_strength_bits; + for (int i = 0; i < sb_count; i++) { + uint64_t best_mse = UINT64_MAX; + int best_gi = 0; + for (int gi = 0; gi < cdef_info->nb_cdef_strengths; gi++) { + uint64_t curr = mse[0][i][cdef_info->cdef_strengths[gi]]; + if (num_planes > 1) curr += mse[1][i][cdef_info->cdef_uv_strengths[gi]]; + if (curr < best_mse) { + best_gi = gi; + best_mse = curr; + } + } + mi_params->mi_grid_base[cdef_search_ctx->sb_index[i]]->cdef_strength = + best_gi; + } + if (fast) { + for (int j = 0; j < cdef_info->nb_cdef_strengths; j++) { + const int luma_strength = cdef_info->cdef_strengths[j]; + const int chroma_strength = cdef_info->cdef_uv_strengths[j]; + int pri_strength, sec_strength; + + STORE_CDEF_FILTER_STRENGTH(cdef_info->cdef_strengths[j], pick_method, + luma_strength); + STORE_CDEF_FILTER_STRENGTH(cdef_info->cdef_uv_strengths[j], pick_method, + chroma_strength); + } + } + + cdef_info->cdef_damping = damping; + // Deallocate CDEF search context buffers. + av1_cdef_dealloc_data(cdef_search_ctx); +} |