diff options
Diffstat (limited to 'third_party/aom/av1/encoder/var_based_part.c')
-rw-r--r-- | third_party/aom/av1/encoder/var_based_part.c | 1914 |
1 files changed, 1914 insertions, 0 deletions
diff --git a/third_party/aom/av1/encoder/var_based_part.c b/third_party/aom/av1/encoder/var_based_part.c new file mode 100644 index 0000000000..f664795153 --- /dev/null +++ b/third_party/aom/av1/encoder/var_based_part.c @@ -0,0 +1,1914 @@ +/* + * Copyright (c) 2019, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <limits.h> +#include <math.h> +#include <stdbool.h> +#include <stdio.h> + +#include "config/aom_config.h" +#include "config/aom_dsp_rtcd.h" +#include "config/av1_rtcd.h" + +#include "aom_dsp/aom_dsp_common.h" +#include "aom_dsp/binary_codes_writer.h" +#include "aom_ports/mem.h" +#include "aom_ports/aom_timer.h" + +#include "av1/common/reconinter.h" +#include "av1/common/blockd.h" + +#include "av1/encoder/encodeframe.h" +#include "av1/encoder/var_based_part.h" +#include "av1/encoder/reconinter_enc.h" +#include "av1/encoder/rdopt_utils.h" + +// Possible values for the force_split variable while evaluating variance based +// partitioning. +enum { + // Evaluate all partition types + PART_EVAL_ALL = 0, + // Force PARTITION_SPLIT + PART_EVAL_ONLY_SPLIT = 1, + // Force PARTITION_NONE + PART_EVAL_ONLY_NONE = 2 +} UENUM1BYTE(PART_EVAL_STATUS); + +typedef struct { + VPVariance *part_variances; + VPartVar *split[4]; +} variance_node; + +static AOM_INLINE void tree_to_node(void *data, BLOCK_SIZE bsize, + variance_node *node) { + node->part_variances = NULL; + switch (bsize) { + case BLOCK_128X128: { + VP128x128 *vt = (VP128x128 *)data; + node->part_variances = &vt->part_variances; + for (int split_idx = 0; split_idx < 4; split_idx++) + node->split[split_idx] = &vt->split[split_idx].part_variances.none; + break; + } + case BLOCK_64X64: { + VP64x64 *vt = (VP64x64 *)data; + node->part_variances = &vt->part_variances; + for (int split_idx = 0; split_idx < 4; split_idx++) + node->split[split_idx] = &vt->split[split_idx].part_variances.none; + break; + } + case BLOCK_32X32: { + VP32x32 *vt = (VP32x32 *)data; + node->part_variances = &vt->part_variances; + for (int split_idx = 0; split_idx < 4; split_idx++) + node->split[split_idx] = &vt->split[split_idx].part_variances.none; + break; + } + case BLOCK_16X16: { + VP16x16 *vt = (VP16x16 *)data; + node->part_variances = &vt->part_variances; + for (int split_idx = 0; split_idx < 4; split_idx++) + node->split[split_idx] = &vt->split[split_idx].part_variances.none; + break; + } + case BLOCK_8X8: { + VP8x8 *vt = (VP8x8 *)data; + node->part_variances = &vt->part_variances; + for (int split_idx = 0; split_idx < 4; split_idx++) + node->split[split_idx] = &vt->split[split_idx].part_variances.none; + break; + } + default: { + VP4x4 *vt = (VP4x4 *)data; + assert(bsize == BLOCK_4X4); + node->part_variances = &vt->part_variances; + for (int split_idx = 0; split_idx < 4; split_idx++) + node->split[split_idx] = &vt->split[split_idx]; + break; + } + } +} + +// Set variance values given sum square error, sum error, count. +static AOM_INLINE void fill_variance(uint32_t s2, int32_t s, int c, + VPartVar *v) { + v->sum_square_error = s2; + v->sum_error = s; + v->log2_count = c; +} + +static AOM_INLINE void get_variance(VPartVar *v) { + v->variance = + (int)(256 * (v->sum_square_error - + (uint32_t)(((int64_t)v->sum_error * v->sum_error) >> + v->log2_count)) >> + v->log2_count); +} + +static AOM_INLINE void sum_2_variances(const VPartVar *a, const VPartVar *b, + VPartVar *r) { + assert(a->log2_count == b->log2_count); + fill_variance(a->sum_square_error + b->sum_square_error, + a->sum_error + b->sum_error, a->log2_count + 1, r); +} + +static AOM_INLINE void fill_variance_tree(void *data, BLOCK_SIZE bsize) { + variance_node node; + memset(&node, 0, sizeof(node)); + tree_to_node(data, bsize, &node); + sum_2_variances(node.split[0], node.split[1], &node.part_variances->horz[0]); + sum_2_variances(node.split[2], node.split[3], &node.part_variances->horz[1]); + sum_2_variances(node.split[0], node.split[2], &node.part_variances->vert[0]); + sum_2_variances(node.split[1], node.split[3], &node.part_variances->vert[1]); + sum_2_variances(&node.part_variances->vert[0], &node.part_variances->vert[1], + &node.part_variances->none); +} + +static AOM_INLINE void set_block_size(AV1_COMP *const cpi, int mi_row, + int mi_col, BLOCK_SIZE bsize) { + if (cpi->common.mi_params.mi_cols > mi_col && + cpi->common.mi_params.mi_rows > mi_row) { + CommonModeInfoParams *mi_params = &cpi->common.mi_params; + const int mi_grid_idx = get_mi_grid_idx(mi_params, mi_row, mi_col); + const int mi_alloc_idx = get_alloc_mi_idx(mi_params, mi_row, mi_col); + MB_MODE_INFO *mi = mi_params->mi_grid_base[mi_grid_idx] = + &mi_params->mi_alloc[mi_alloc_idx]; + mi->bsize = bsize; + } +} + +static int set_vt_partitioning(AV1_COMP *cpi, MACROBLOCKD *const xd, + const TileInfo *const tile, void *data, + BLOCK_SIZE bsize, int mi_row, int mi_col, + int64_t threshold, BLOCK_SIZE bsize_min, + PART_EVAL_STATUS force_split) { + AV1_COMMON *const cm = &cpi->common; + variance_node vt; + const int block_width = mi_size_wide[bsize]; + const int block_height = mi_size_high[bsize]; + int bs_width_check = block_width; + int bs_height_check = block_height; + int bs_width_vert_check = block_width >> 1; + int bs_height_horiz_check = block_height >> 1; + // On the right and bottom boundary we only need to check + // if half the bsize fits, because boundary is extended + // up to 64. So do this check only for sb_size = 64X64. + if (cm->seq_params->sb_size == BLOCK_64X64) { + if (tile->mi_col_end == cm->mi_params.mi_cols) { + bs_width_check = (block_width >> 1) + 1; + bs_width_vert_check = (block_width >> 2) + 1; + } + if (tile->mi_row_end == cm->mi_params.mi_rows) { + bs_height_check = (block_height >> 1) + 1; + bs_height_horiz_check = (block_height >> 2) + 1; + } + } + + assert(block_height == block_width); + tree_to_node(data, bsize, &vt); + + if (mi_col + bs_width_check <= tile->mi_col_end && + mi_row + bs_height_check <= tile->mi_row_end && + force_split == PART_EVAL_ONLY_NONE) { + set_block_size(cpi, mi_row, mi_col, bsize); + return 1; + } + if (force_split == PART_EVAL_ONLY_SPLIT) return 0; + + // For bsize=bsize_min (16x16/8x8 for 8x8/4x4 downsampling), select if + // variance is below threshold, otherwise split will be selected. + // No check for vert/horiz split as too few samples for variance. + if (bsize == bsize_min) { + // Variance already computed to set the force_split. + if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none); + if (mi_col + bs_width_check <= tile->mi_col_end && + mi_row + bs_height_check <= tile->mi_row_end && + vt.part_variances->none.variance < threshold) { + set_block_size(cpi, mi_row, mi_col, bsize); + return 1; + } + return 0; + } else if (bsize > bsize_min) { + // Variance already computed to set the force_split. + if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none); + // For key frame: take split for bsize above 32X32 or very high variance. + if (frame_is_intra_only(cm) && + (bsize > BLOCK_32X32 || + vt.part_variances->none.variance > (threshold << 4))) { + return 0; + } + // If variance is low, take the bsize (no split). + if (mi_col + bs_width_check <= tile->mi_col_end && + mi_row + bs_height_check <= tile->mi_row_end && + vt.part_variances->none.variance < threshold) { + set_block_size(cpi, mi_row, mi_col, bsize); + return 1; + } + // Check vertical split. + if (mi_row + bs_height_check <= tile->mi_row_end && + mi_col + bs_width_vert_check <= tile->mi_col_end) { + BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_VERT); + BLOCK_SIZE plane_bsize = + get_plane_block_size(subsize, xd->plane[AOM_PLANE_U].subsampling_x, + xd->plane[AOM_PLANE_U].subsampling_y); + get_variance(&vt.part_variances->vert[0]); + get_variance(&vt.part_variances->vert[1]); + if (vt.part_variances->vert[0].variance < threshold && + vt.part_variances->vert[1].variance < threshold && + plane_bsize < BLOCK_INVALID) { + set_block_size(cpi, mi_row, mi_col, subsize); + set_block_size(cpi, mi_row, mi_col + block_width / 2, subsize); + return 1; + } + } + // Check horizontal split. + if (mi_col + bs_width_check <= tile->mi_col_end && + mi_row + bs_height_horiz_check <= tile->mi_row_end) { + BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_HORZ); + BLOCK_SIZE plane_bsize = + get_plane_block_size(subsize, xd->plane[AOM_PLANE_U].subsampling_x, + xd->plane[AOM_PLANE_U].subsampling_y); + get_variance(&vt.part_variances->horz[0]); + get_variance(&vt.part_variances->horz[1]); + if (vt.part_variances->horz[0].variance < threshold && + vt.part_variances->horz[1].variance < threshold && + plane_bsize < BLOCK_INVALID) { + set_block_size(cpi, mi_row, mi_col, subsize); + set_block_size(cpi, mi_row + block_height / 2, mi_col, subsize); + return 1; + } + } + return 0; + } + return 0; +} + +static AOM_INLINE int all_blks_inside(int x16_idx, int y16_idx, int pixels_wide, + int pixels_high) { + int all_inside = 1; + for (int idx = 0; idx < 4; idx++) { + all_inside &= ((x16_idx + GET_BLK_IDX_X(idx, 3)) < pixels_wide); + all_inside &= ((y16_idx + GET_BLK_IDX_Y(idx, 3)) < pixels_high); + } + return all_inside; +} + +#if CONFIG_AV1_HIGHBITDEPTH +// TODO(yunqingwang): Perform average of four 8x8 blocks similar to lowbd +static AOM_INLINE void fill_variance_8x8avg_highbd( + const uint8_t *src_buf, int src_stride, const uint8_t *dst_buf, + int dst_stride, int x16_idx, int y16_idx, VP16x16 *vst, int pixels_wide, + int pixels_high) { + for (int idx = 0; idx < 4; idx++) { + const int x8_idx = x16_idx + GET_BLK_IDX_X(idx, 3); + const int y8_idx = y16_idx + GET_BLK_IDX_Y(idx, 3); + unsigned int sse = 0; + int sum = 0; + if (x8_idx < pixels_wide && y8_idx < pixels_high) { + int src_avg = aom_highbd_avg_8x8(src_buf + y8_idx * src_stride + x8_idx, + src_stride); + int dst_avg = aom_highbd_avg_8x8(dst_buf + y8_idx * dst_stride + x8_idx, + dst_stride); + + sum = src_avg - dst_avg; + sse = sum * sum; + } + fill_variance(sse, sum, 0, &vst->split[idx].part_variances.none); + } +} +#endif + +static AOM_INLINE void fill_variance_8x8avg_lowbd( + const uint8_t *src_buf, int src_stride, const uint8_t *dst_buf, + int dst_stride, int x16_idx, int y16_idx, VP16x16 *vst, int pixels_wide, + int pixels_high) { + unsigned int sse[4] = { 0 }; + int sum[4] = { 0 }; + + if (all_blks_inside(x16_idx, y16_idx, pixels_wide, pixels_high)) { + int src_avg[4]; + int dst_avg[4]; + aom_avg_8x8_quad(src_buf, src_stride, x16_idx, y16_idx, src_avg); + aom_avg_8x8_quad(dst_buf, dst_stride, x16_idx, y16_idx, dst_avg); + for (int idx = 0; idx < 4; idx++) { + sum[idx] = src_avg[idx] - dst_avg[idx]; + sse[idx] = sum[idx] * sum[idx]; + } + } else { + for (int idx = 0; idx < 4; idx++) { + const int x8_idx = x16_idx + GET_BLK_IDX_X(idx, 3); + const int y8_idx = y16_idx + GET_BLK_IDX_Y(idx, 3); + if (x8_idx < pixels_wide && y8_idx < pixels_high) { + int src_avg = + aom_avg_8x8(src_buf + y8_idx * src_stride + x8_idx, src_stride); + int dst_avg = + aom_avg_8x8(dst_buf + y8_idx * dst_stride + x8_idx, dst_stride); + sum[idx] = src_avg - dst_avg; + sse[idx] = sum[idx] * sum[idx]; + } + } + } + + for (int idx = 0; idx < 4; idx++) { + fill_variance(sse[idx], sum[idx], 0, &vst->split[idx].part_variances.none); + } +} + +// Obtain parameters required to calculate variance (such as sum, sse, etc,.) +// at 8x8 sub-block level for a given 16x16 block. +// The function can be called only when is_key_frame is false since sum is +// computed between source and reference frames. +static AOM_INLINE void fill_variance_8x8avg( + const uint8_t *src_buf, int src_stride, const uint8_t *dst_buf, + int dst_stride, int x16_idx, int y16_idx, VP16x16 *vst, int highbd_flag, + int pixels_wide, int pixels_high) { +#if CONFIG_AV1_HIGHBITDEPTH + if (highbd_flag) { + fill_variance_8x8avg_highbd(src_buf, src_stride, dst_buf, dst_stride, + x16_idx, y16_idx, vst, pixels_wide, + pixels_high); + return; + } +#else + (void)highbd_flag; +#endif // CONFIG_AV1_HIGHBITDEPTH + fill_variance_8x8avg_lowbd(src_buf, src_stride, dst_buf, dst_stride, x16_idx, + y16_idx, vst, pixels_wide, pixels_high); +} + +static int compute_minmax_8x8(const uint8_t *src_buf, int src_stride, + const uint8_t *dst_buf, int dst_stride, + int x16_idx, int y16_idx, +#if CONFIG_AV1_HIGHBITDEPTH + int highbd_flag, +#endif + int pixels_wide, int pixels_high) { + int minmax_max = 0; + int minmax_min = 255; + // Loop over the 4 8x8 subblocks. + for (int idx = 0; idx < 4; idx++) { + const int x8_idx = x16_idx + GET_BLK_IDX_X(idx, 3); + const int y8_idx = y16_idx + GET_BLK_IDX_Y(idx, 3); + int min = 0; + int max = 0; + if (x8_idx < pixels_wide && y8_idx < pixels_high) { +#if CONFIG_AV1_HIGHBITDEPTH + if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) { + aom_highbd_minmax_8x8( + src_buf + y8_idx * src_stride + x8_idx, src_stride, + dst_buf + y8_idx * dst_stride + x8_idx, dst_stride, &min, &max); + } else { + aom_minmax_8x8(src_buf + y8_idx * src_stride + x8_idx, src_stride, + dst_buf + y8_idx * dst_stride + x8_idx, dst_stride, &min, + &max); + } +#else + aom_minmax_8x8(src_buf + y8_idx * src_stride + x8_idx, src_stride, + dst_buf + y8_idx * dst_stride + x8_idx, dst_stride, &min, + &max); +#endif + if ((max - min) > minmax_max) minmax_max = (max - min); + if ((max - min) < minmax_min) minmax_min = (max - min); + } + } + return (minmax_max - minmax_min); +} + +// Function to compute average and variance of 4x4 sub-block. +// The function can be called only when is_key_frame is true since sum is +// computed using source frame only. +static AOM_INLINE void fill_variance_4x4avg(const uint8_t *src_buf, + int src_stride, int x8_idx, + int y8_idx, VP8x8 *vst, +#if CONFIG_AV1_HIGHBITDEPTH + int highbd_flag, +#endif + int pixels_wide, int pixels_high, + int border_offset_4x4) { + for (int idx = 0; idx < 4; idx++) { + const int x4_idx = x8_idx + GET_BLK_IDX_X(idx, 2); + const int y4_idx = y8_idx + GET_BLK_IDX_Y(idx, 2); + unsigned int sse = 0; + int sum = 0; + if (x4_idx < pixels_wide - border_offset_4x4 && + y4_idx < pixels_high - border_offset_4x4) { + int src_avg; + int dst_avg = 128; +#if CONFIG_AV1_HIGHBITDEPTH + if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) { + src_avg = aom_highbd_avg_4x4(src_buf + y4_idx * src_stride + x4_idx, + src_stride); + } else { + src_avg = + aom_avg_4x4(src_buf + y4_idx * src_stride + x4_idx, src_stride); + } +#else + src_avg = aom_avg_4x4(src_buf + y4_idx * src_stride + x4_idx, src_stride); +#endif + + sum = src_avg - dst_avg; + sse = sum * sum; + } + fill_variance(sse, sum, 0, &vst->split[idx].part_variances.none); + } +} + +// TODO(kyslov) Bring back threshold adjustment based on content state +static int64_t scale_part_thresh_content(int64_t threshold_base, int speed, + int width, int height, + int non_reference_frame) { + (void)width; + (void)height; + int64_t threshold = threshold_base; + if (non_reference_frame) threshold = (3 * threshold) >> 1; + if (speed >= 8) { + return (5 * threshold) >> 2; + } + return threshold; +} + +// Tune thresholds less or more aggressively to prefer larger partitions +static AOM_INLINE void tune_thresh_based_on_qindex( + AV1_COMP *cpi, int64_t thresholds[], uint64_t block_sad, int current_qindex, + int num_pixels, bool is_segment_id_boosted, int source_sad_nonrd, + int lighting_change) { + double weight; + if (cpi->sf.rt_sf.prefer_large_partition_blocks >= 3) { + const int win = 20; + if (current_qindex < QINDEX_LARGE_BLOCK_THR - win) + weight = 1.0; + else if (current_qindex > QINDEX_LARGE_BLOCK_THR + win) + weight = 0.0; + else + weight = + 1.0 - (current_qindex - QINDEX_LARGE_BLOCK_THR + win) / (2 * win); + if (num_pixels > RESOLUTION_480P) { + for (int i = 0; i < 4; i++) { + thresholds[i] <<= 1; + } + } + if (num_pixels <= RESOLUTION_288P) { + thresholds[3] = INT64_MAX; + if (is_segment_id_boosted == false) { + thresholds[1] <<= 2; + thresholds[2] <<= (source_sad_nonrd <= kLowSad) ? 5 : 4; + } else { + thresholds[1] <<= 1; + thresholds[2] <<= 3; + } + // Allow for split to 8x8 for superblocks where part of it has + // moving boundary. So allow for sb with source_sad above threshold, + // and avoid very large source_sad or high source content, to avoid + // too many 8x8 within superblock. + uint64_t avg_source_sad_thresh = 25000; + uint64_t block_sad_low = 25000; + uint64_t block_sad_high = 50000; + if (cpi->svc.temporal_layer_id == 0 && + cpi->svc.number_temporal_layers > 1) { + // Increase the sad thresholds for base TL0, as reference/LAST is + // 2/4 frames behind (for 2/3 #TL). + avg_source_sad_thresh = 40000; + block_sad_high = 70000; + } + if (is_segment_id_boosted == false && + cpi->rc.avg_source_sad < avg_source_sad_thresh && + block_sad > block_sad_low && block_sad < block_sad_high && + !lighting_change) { + thresholds[2] = (3 * thresholds[2]) >> 2; + thresholds[3] = thresholds[2] << 3; + } + // Condition the increase of partition thresholds on the segment + // and the content. Avoid the increase for superblocks which have + // high source sad, unless the whole frame has very high motion + // (i.e, cpi->rc.avg_source_sad is very large, in which case all blocks + // have high source sad). + } else if (num_pixels > RESOLUTION_480P && is_segment_id_boosted == false && + (source_sad_nonrd != kHighSad || + cpi->rc.avg_source_sad > 50000)) { + thresholds[0] = (3 * thresholds[0]) >> 1; + thresholds[3] = INT64_MAX; + if (current_qindex > QINDEX_LARGE_BLOCK_THR) { + thresholds[1] = + (int)((1 - weight) * (thresholds[1] << 1) + weight * thresholds[1]); + thresholds[2] = + (int)((1 - weight) * (thresholds[2] << 1) + weight * thresholds[2]); + } + } else if (current_qindex > QINDEX_LARGE_BLOCK_THR && + is_segment_id_boosted == false && + (source_sad_nonrd != kHighSad || + cpi->rc.avg_source_sad > 50000)) { + thresholds[1] = + (int)((1 - weight) * (thresholds[1] << 2) + weight * thresholds[1]); + thresholds[2] = + (int)((1 - weight) * (thresholds[2] << 4) + weight * thresholds[2]); + thresholds[3] = INT64_MAX; + } + } else if (cpi->sf.rt_sf.prefer_large_partition_blocks >= 2) { + thresholds[1] <<= (source_sad_nonrd <= kLowSad) ? 2 : 0; + thresholds[2] = + (source_sad_nonrd <= kLowSad) ? (3 * thresholds[2]) : thresholds[2]; + } else if (cpi->sf.rt_sf.prefer_large_partition_blocks >= 1) { + const int fac = (source_sad_nonrd <= kLowSad) ? 2 : 1; + if (current_qindex < QINDEX_LARGE_BLOCK_THR - 45) + weight = 1.0; + else if (current_qindex > QINDEX_LARGE_BLOCK_THR + 45) + weight = 0.0; + else + weight = 1.0 - (current_qindex - QINDEX_LARGE_BLOCK_THR + 45) / (2 * 45); + thresholds[1] = + (int)((1 - weight) * (thresholds[1] << 1) + weight * thresholds[1]); + thresholds[2] = + (int)((1 - weight) * (thresholds[2] << 1) + weight * thresholds[2]); + thresholds[3] = + (int)((1 - weight) * (thresholds[3] << fac) + weight * thresholds[3]); + } + if (cpi->sf.part_sf.disable_8x8_part_based_on_qidx && (current_qindex < 128)) + thresholds[3] = INT64_MAX; +} + +static void set_vbp_thresholds_key_frame(AV1_COMP *cpi, int64_t thresholds[], + int64_t threshold_base, + int threshold_left_shift, + int num_pixels) { + if (cpi->sf.rt_sf.force_large_partition_blocks_intra) { + const int shift_steps = + threshold_left_shift - (cpi->oxcf.mode == ALLINTRA ? 7 : 8); + assert(shift_steps >= 0); + threshold_base <<= shift_steps; + } + thresholds[0] = threshold_base; + thresholds[1] = threshold_base; + if (num_pixels < RESOLUTION_720P) { + thresholds[2] = threshold_base / 3; + thresholds[3] = threshold_base >> 1; + } else { + int shift_val = 2; + if (cpi->sf.rt_sf.force_large_partition_blocks_intra) { + shift_val = 0; + } + + thresholds[2] = threshold_base >> shift_val; + thresholds[3] = threshold_base >> shift_val; + } + thresholds[4] = threshold_base << 2; +} + +static AOM_INLINE void tune_thresh_based_on_resolution( + AV1_COMP *cpi, int64_t thresholds[], int64_t threshold_base, + int current_qindex, int source_sad_rd, int num_pixels) { + if (num_pixels >= RESOLUTION_720P) thresholds[3] = thresholds[3] << 1; + if (num_pixels <= RESOLUTION_288P) { + const int qindex_thr[5][2] = { + { 200, 220 }, { 140, 170 }, { 120, 150 }, { 200, 210 }, { 170, 220 }, + }; + int th_idx = 0; + if (cpi->sf.rt_sf.var_part_based_on_qidx >= 1) + th_idx = + (source_sad_rd <= kLowSad) ? cpi->sf.rt_sf.var_part_based_on_qidx : 0; + if (cpi->sf.rt_sf.var_part_based_on_qidx >= 3) + th_idx = cpi->sf.rt_sf.var_part_based_on_qidx; + const int qindex_low_thr = qindex_thr[th_idx][0]; + const int qindex_high_thr = qindex_thr[th_idx][1]; + if (current_qindex >= qindex_high_thr) { + threshold_base = (5 * threshold_base) >> 1; + thresholds[1] = threshold_base >> 3; + thresholds[2] = threshold_base << 2; + thresholds[3] = threshold_base << 5; + } else if (current_qindex < qindex_low_thr) { + thresholds[1] = threshold_base >> 3; + thresholds[2] = threshold_base >> 1; + thresholds[3] = threshold_base << 3; + } else { + int64_t qi_diff_low = current_qindex - qindex_low_thr; + int64_t qi_diff_high = qindex_high_thr - current_qindex; + int64_t threshold_diff = qindex_high_thr - qindex_low_thr; + int64_t threshold_base_high = (5 * threshold_base) >> 1; + + threshold_diff = threshold_diff > 0 ? threshold_diff : 1; + threshold_base = + (qi_diff_low * threshold_base_high + qi_diff_high * threshold_base) / + threshold_diff; + thresholds[1] = threshold_base >> 3; + thresholds[2] = ((qi_diff_low * threshold_base) + + qi_diff_high * (threshold_base >> 1)) / + threshold_diff; + thresholds[3] = ((qi_diff_low * (threshold_base << 5)) + + qi_diff_high * (threshold_base << 3)) / + threshold_diff; + } + } else if (num_pixels < RESOLUTION_720P) { + thresholds[2] = (5 * threshold_base) >> 2; + } else if (num_pixels < RESOLUTION_1080P) { + thresholds[2] = threshold_base << 1; + } else { + // num_pixels >= RESOLUTION_1080P + if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN) { + if (num_pixels < RESOLUTION_1440P) { + thresholds[2] = (5 * threshold_base) >> 1; + } else { + thresholds[2] = (7 * threshold_base) >> 1; + } + } else { + if (cpi->oxcf.speed > 7) { + thresholds[2] = 6 * threshold_base; + } else { + thresholds[2] = 3 * threshold_base; + } + } + } +} + +// Increase partition thresholds for noisy content. Apply it only for +// superblocks where sumdiff is low, as we assume the sumdiff of superblock +// whose only change is due to noise will be low (i.e, noise will average +// out over large block). +static AOM_INLINE int64_t tune_thresh_noisy_content(AV1_COMP *cpi, + int64_t threshold_base, + int content_lowsumdiff, + int num_pixels) { + AV1_COMMON *const cm = &cpi->common; + int64_t updated_thresh_base = threshold_base; + if (cpi->noise_estimate.enabled && content_lowsumdiff && + num_pixels > RESOLUTION_480P && cm->current_frame.frame_number > 60) { + NOISE_LEVEL noise_level = + av1_noise_estimate_extract_level(&cpi->noise_estimate); + if (noise_level == kHigh) + updated_thresh_base = (5 * updated_thresh_base) >> 1; + else if (noise_level == kMedium && + !cpi->sf.rt_sf.prefer_large_partition_blocks) + updated_thresh_base = (5 * updated_thresh_base) >> 2; + } + // TODO(kyslov) Enable var based partition adjusment on temporal denoising +#if 0 // CONFIG_AV1_TEMPORAL_DENOISING + if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) && + cpi->oxcf.speed > 5 && cpi->denoiser.denoising_level >= kDenLow) + updated_thresh_base = + av1_scale_part_thresh(updated_thresh_base, cpi->denoiser.denoising_level, + content_state, cpi->svc.temporal_layer_id); + else + threshold_base = + scale_part_thresh_content(updated_thresh_base, cpi->oxcf.speed, cm->width, + cm->height, cpi->ppi->rtc_ref.non_reference_frame); +#else + // Increase base variance threshold based on content_state/sum_diff level. + updated_thresh_base = scale_part_thresh_content( + updated_thresh_base, cpi->oxcf.speed, cm->width, cm->height, + cpi->ppi->rtc_ref.non_reference_frame); +#endif + return updated_thresh_base; +} + +static AOM_INLINE void set_vbp_thresholds( + AV1_COMP *cpi, int64_t thresholds[], uint64_t blk_sad, int qindex, + int content_lowsumdiff, int source_sad_nonrd, int source_sad_rd, + bool is_segment_id_boosted, int lighting_change) { + AV1_COMMON *const cm = &cpi->common; + const int is_key_frame = frame_is_intra_only(cm); + const int threshold_multiplier = is_key_frame ? 120 : 1; + const int ac_q = av1_ac_quant_QTX(qindex, 0, cm->seq_params->bit_depth); + int64_t threshold_base = (int64_t)(threshold_multiplier * ac_q); + const int current_qindex = cm->quant_params.base_qindex; + const int threshold_left_shift = cpi->sf.rt_sf.var_part_split_threshold_shift; + const int num_pixels = cm->width * cm->height; + + if (is_key_frame) { + set_vbp_thresholds_key_frame(cpi, thresholds, threshold_base, + threshold_left_shift, num_pixels); + return; + } + + threshold_base = tune_thresh_noisy_content(cpi, threshold_base, + content_lowsumdiff, num_pixels); + thresholds[0] = threshold_base >> 1; + thresholds[1] = threshold_base; + thresholds[3] = threshold_base << threshold_left_shift; + + tune_thresh_based_on_resolution(cpi, thresholds, threshold_base, + current_qindex, source_sad_rd, num_pixels); + + tune_thresh_based_on_qindex(cpi, thresholds, blk_sad, current_qindex, + num_pixels, is_segment_id_boosted, + source_sad_nonrd, lighting_change); +} + +// Set temporal variance low flag for superblock 64x64. +// Only first 25 in the array are used in this case. +static AOM_INLINE void set_low_temp_var_flag_64x64( + CommonModeInfoParams *mi_params, PartitionSearchInfo *part_info, + MACROBLOCKD *xd, VP64x64 *vt, const int64_t thresholds[], int mi_col, + int mi_row) { + if (xd->mi[0]->bsize == BLOCK_64X64) { + if ((vt->part_variances).none.variance < (thresholds[0] >> 1)) + part_info->variance_low[0] = 1; + } else if (xd->mi[0]->bsize == BLOCK_64X32) { + for (int part_idx = 0; part_idx < 2; part_idx++) { + if (vt->part_variances.horz[part_idx].variance < (thresholds[0] >> 2)) + part_info->variance_low[part_idx + 1] = 1; + } + } else if (xd->mi[0]->bsize == BLOCK_32X64) { + for (int part_idx = 0; part_idx < 2; part_idx++) { + if (vt->part_variances.vert[part_idx].variance < (thresholds[0] >> 2)) + part_info->variance_low[part_idx + 3] = 1; + } + } else { + static const int idx[4][2] = { { 0, 0 }, { 0, 8 }, { 8, 0 }, { 8, 8 } }; + for (int lvl1_idx = 0; lvl1_idx < 4; lvl1_idx++) { + const int idx_str = mi_params->mi_stride * (mi_row + idx[lvl1_idx][0]) + + mi_col + idx[lvl1_idx][1]; + MB_MODE_INFO **this_mi = mi_params->mi_grid_base + idx_str; + + if (mi_params->mi_cols <= mi_col + idx[lvl1_idx][1] || + mi_params->mi_rows <= mi_row + idx[lvl1_idx][0]) + continue; + + if (*this_mi == NULL) continue; + + if ((*this_mi)->bsize == BLOCK_32X32) { + int64_t threshold_32x32 = (5 * thresholds[1]) >> 3; + if (vt->split[lvl1_idx].part_variances.none.variance < threshold_32x32) + part_info->variance_low[lvl1_idx + 5] = 1; + } else { + // For 32x16 and 16x32 blocks, the flag is set on each 16x16 block + // inside. + if ((*this_mi)->bsize == BLOCK_16X16 || + (*this_mi)->bsize == BLOCK_32X16 || + (*this_mi)->bsize == BLOCK_16X32) { + for (int lvl2_idx = 0; lvl2_idx < 4; lvl2_idx++) { + if (vt->split[lvl1_idx] + .split[lvl2_idx] + .part_variances.none.variance < (thresholds[2] >> 8)) + part_info->variance_low[(lvl1_idx << 2) + lvl2_idx + 9] = 1; + } + } + } + } + } +} + +static AOM_INLINE void set_low_temp_var_flag_128x128( + CommonModeInfoParams *mi_params, PartitionSearchInfo *part_info, + MACROBLOCKD *xd, VP128x128 *vt, const int64_t thresholds[], int mi_col, + int mi_row) { + if (xd->mi[0]->bsize == BLOCK_128X128) { + if (vt->part_variances.none.variance < (thresholds[0] >> 1)) + part_info->variance_low[0] = 1; + } else if (xd->mi[0]->bsize == BLOCK_128X64) { + for (int part_idx = 0; part_idx < 2; part_idx++) { + if (vt->part_variances.horz[part_idx].variance < (thresholds[0] >> 2)) + part_info->variance_low[part_idx + 1] = 1; + } + } else if (xd->mi[0]->bsize == BLOCK_64X128) { + for (int part_idx = 0; part_idx < 2; part_idx++) { + if (vt->part_variances.vert[part_idx].variance < (thresholds[0] >> 2)) + part_info->variance_low[part_idx + 3] = 1; + } + } else { + static const int idx64[4][2] = { + { 0, 0 }, { 0, 16 }, { 16, 0 }, { 16, 16 } + }; + static const int idx32[4][2] = { { 0, 0 }, { 0, 8 }, { 8, 0 }, { 8, 8 } }; + for (int lvl1_idx = 0; lvl1_idx < 4; lvl1_idx++) { + const int idx_str = mi_params->mi_stride * (mi_row + idx64[lvl1_idx][0]) + + mi_col + idx64[lvl1_idx][1]; + MB_MODE_INFO **mi_64 = mi_params->mi_grid_base + idx_str; + if (*mi_64 == NULL) continue; + if (mi_params->mi_cols <= mi_col + idx64[lvl1_idx][1] || + mi_params->mi_rows <= mi_row + idx64[lvl1_idx][0]) + continue; + const int64_t threshold_64x64 = (5 * thresholds[1]) >> 3; + if ((*mi_64)->bsize == BLOCK_64X64) { + if (vt->split[lvl1_idx].part_variances.none.variance < threshold_64x64) + part_info->variance_low[5 + lvl1_idx] = 1; + } else if ((*mi_64)->bsize == BLOCK_64X32) { + for (int part_idx = 0; part_idx < 2; part_idx++) + if (vt->split[lvl1_idx].part_variances.horz[part_idx].variance < + (threshold_64x64 >> 1)) + part_info->variance_low[9 + (lvl1_idx << 1) + part_idx] = 1; + } else if ((*mi_64)->bsize == BLOCK_32X64) { + for (int part_idx = 0; part_idx < 2; part_idx++) + if (vt->split[lvl1_idx].part_variances.vert[part_idx].variance < + (threshold_64x64 >> 1)) + part_info->variance_low[17 + (lvl1_idx << 1) + part_idx] = 1; + } else { + for (int lvl2_idx = 0; lvl2_idx < 4; lvl2_idx++) { + const int idx_str1 = + mi_params->mi_stride * idx32[lvl2_idx][0] + idx32[lvl2_idx][1]; + MB_MODE_INFO **mi_32 = mi_params->mi_grid_base + idx_str + idx_str1; + if (*mi_32 == NULL) continue; + + if (mi_params->mi_cols <= + mi_col + idx64[lvl1_idx][1] + idx32[lvl2_idx][1] || + mi_params->mi_rows <= + mi_row + idx64[lvl1_idx][0] + idx32[lvl2_idx][0]) + continue; + const int64_t threshold_32x32 = (5 * thresholds[2]) >> 3; + if ((*mi_32)->bsize == BLOCK_32X32) { + if (vt->split[lvl1_idx] + .split[lvl2_idx] + .part_variances.none.variance < threshold_32x32) + part_info->variance_low[25 + (lvl1_idx << 2) + lvl2_idx] = 1; + } else { + // For 32x16 and 16x32 blocks, the flag is set on each 16x16 block + // inside. + if ((*mi_32)->bsize == BLOCK_16X16 || + (*mi_32)->bsize == BLOCK_32X16 || + (*mi_32)->bsize == BLOCK_16X32) { + for (int lvl3_idx = 0; lvl3_idx < 4; lvl3_idx++) { + VPartVar *none_var = &vt->split[lvl1_idx] + .split[lvl2_idx] + .split[lvl3_idx] + .part_variances.none; + if (none_var->variance < (thresholds[3] >> 8)) + part_info->variance_low[41 + (lvl1_idx << 4) + + (lvl2_idx << 2) + lvl3_idx] = 1; + } + } + } + } + } + } + } +} + +static AOM_INLINE void set_low_temp_var_flag( + AV1_COMP *cpi, PartitionSearchInfo *part_info, MACROBLOCKD *xd, + VP128x128 *vt, int64_t thresholds[], MV_REFERENCE_FRAME ref_frame_partition, + int mi_col, int mi_row, const bool is_small_sb) { + AV1_COMMON *const cm = &cpi->common; + // Check temporal variance for bsize >= 16x16, if LAST_FRAME was selected. + // If the temporal variance is small set the flag + // variance_low for the block. The variance threshold can be adjusted, the + // higher the more aggressive. + if (ref_frame_partition == LAST_FRAME) { + if (is_small_sb) + set_low_temp_var_flag_64x64(&cm->mi_params, part_info, xd, + &(vt->split[0]), thresholds, mi_col, mi_row); + else + set_low_temp_var_flag_128x128(&cm->mi_params, part_info, xd, vt, + thresholds, mi_col, mi_row); + } +} + +static const int pos_shift_16x16[4][4] = { + { 9, 10, 13, 14 }, { 11, 12, 15, 16 }, { 17, 18, 21, 22 }, { 19, 20, 23, 24 } +}; + +int av1_get_force_skip_low_temp_var_small_sb(const uint8_t *variance_low, + int mi_row, int mi_col, + BLOCK_SIZE bsize) { + // Relative indices of MB inside the superblock. + const int mi_x = mi_row & 0xF; + const int mi_y = mi_col & 0xF; + // Relative indices of 16x16 block inside the superblock. + const int i = mi_x >> 2; + const int j = mi_y >> 2; + int force_skip_low_temp_var = 0; + // Set force_skip_low_temp_var based on the block size and block offset. + switch (bsize) { + case BLOCK_64X64: force_skip_low_temp_var = variance_low[0]; break; + case BLOCK_64X32: + if (!mi_y && !mi_x) { + force_skip_low_temp_var = variance_low[1]; + } else if (!mi_y && mi_x) { + force_skip_low_temp_var = variance_low[2]; + } + break; + case BLOCK_32X64: + if (!mi_y && !mi_x) { + force_skip_low_temp_var = variance_low[3]; + } else if (mi_y && !mi_x) { + force_skip_low_temp_var = variance_low[4]; + } + break; + case BLOCK_32X32: + if (!mi_y && !mi_x) { + force_skip_low_temp_var = variance_low[5]; + } else if (mi_y && !mi_x) { + force_skip_low_temp_var = variance_low[6]; + } else if (!mi_y && mi_x) { + force_skip_low_temp_var = variance_low[7]; + } else if (mi_y && mi_x) { + force_skip_low_temp_var = variance_low[8]; + } + break; + case BLOCK_32X16: + case BLOCK_16X32: + case BLOCK_16X16: + force_skip_low_temp_var = variance_low[pos_shift_16x16[i][j]]; + break; + default: break; + } + + return force_skip_low_temp_var; +} + +int av1_get_force_skip_low_temp_var(const uint8_t *variance_low, int mi_row, + int mi_col, BLOCK_SIZE bsize) { + int force_skip_low_temp_var = 0; + int x, y; + x = (mi_col & 0x1F) >> 4; + // y = (mi_row & 0x1F) >> 4; + // const int idx64 = (y << 1) + x; + y = (mi_row & 0x17) >> 3; + const int idx64 = y + x; + + x = (mi_col & 0xF) >> 3; + // y = (mi_row & 0xF) >> 3; + // const int idx32 = (y << 1) + x; + y = (mi_row & 0xB) >> 2; + const int idx32 = y + x; + + x = (mi_col & 0x7) >> 2; + // y = (mi_row & 0x7) >> 2; + // const int idx16 = (y << 1) + x; + y = (mi_row & 0x5) >> 1; + const int idx16 = y + x; + // Set force_skip_low_temp_var based on the block size and block offset. + switch (bsize) { + case BLOCK_128X128: force_skip_low_temp_var = variance_low[0]; break; + case BLOCK_128X64: + assert((mi_col & 0x1F) == 0); + force_skip_low_temp_var = variance_low[1 + ((mi_row & 0x1F) != 0)]; + break; + case BLOCK_64X128: + assert((mi_row & 0x1F) == 0); + force_skip_low_temp_var = variance_low[3 + ((mi_col & 0x1F) != 0)]; + break; + case BLOCK_64X64: + // Location of this 64x64 block inside the 128x128 superblock + force_skip_low_temp_var = variance_low[5 + idx64]; + break; + case BLOCK_64X32: + x = (mi_col & 0x1F) >> 4; + y = (mi_row & 0x1F) >> 3; + /* + .---------------.---------------. + | x=0,y=0,idx=0 | x=0,y=0,idx=2 | + :---------------+---------------: + | x=0,y=1,idx=1 | x=1,y=1,idx=3 | + :---------------+---------------: + | x=0,y=2,idx=4 | x=1,y=2,idx=6 | + :---------------+---------------: + | x=0,y=3,idx=5 | x=1,y=3,idx=7 | + '---------------'---------------' + */ + const int idx64x32 = (x << 1) + (y % 2) + ((y >> 1) << 2); + force_skip_low_temp_var = variance_low[9 + idx64x32]; + break; + case BLOCK_32X64: + x = (mi_col & 0x1F) >> 3; + y = (mi_row & 0x1F) >> 4; + const int idx32x64 = (y << 2) + x; + force_skip_low_temp_var = variance_low[17 + idx32x64]; + break; + case BLOCK_32X32: + force_skip_low_temp_var = variance_low[25 + (idx64 << 2) + idx32]; + break; + case BLOCK_32X16: + case BLOCK_16X32: + case BLOCK_16X16: + force_skip_low_temp_var = + variance_low[41 + (idx64 << 4) + (idx32 << 2) + idx16]; + break; + default: break; + } + return force_skip_low_temp_var; +} + +void av1_set_variance_partition_thresholds(AV1_COMP *cpi, int qindex, + int content_lowsumdiff) { + SPEED_FEATURES *const sf = &cpi->sf; + if (sf->part_sf.partition_search_type != VAR_BASED_PARTITION) { + return; + } else { + set_vbp_thresholds(cpi, cpi->vbp_info.thresholds, 0, qindex, + content_lowsumdiff, 0, 0, 0, 0); + // The threshold below is not changed locally. + cpi->vbp_info.threshold_minmax = 15 + (qindex >> 3); + } +} + +static AOM_INLINE void chroma_check(AV1_COMP *cpi, MACROBLOCK *x, + BLOCK_SIZE bsize, unsigned int y_sad, + unsigned int y_sad_g, + unsigned int y_sad_alt, bool is_key_frame, + bool zero_motion, unsigned int *uv_sad) { + MACROBLOCKD *xd = &x->e_mbd; + const int source_sad_nonrd = x->content_state_sb.source_sad_nonrd; + int shift_upper_limit = 1; + int shift_lower_limit = 3; + int fac_uv = 6; + if (is_key_frame || cpi->oxcf.tool_cfg.enable_monochrome) return; + + // Use lower threshold (more conservative in setting color flag) for + // higher resolutions non-screen, which tend to have more camera noise. + // Since this may be used to skip compound mode in nonrd pickmode, which + // is generally more effective for higher resolutions, better to be more + // conservative. + if (cpi->oxcf.tune_cfg.content != AOM_CONTENT_SCREEN) { + if (cpi->common.width * cpi->common.height >= RESOLUTION_1080P) + fac_uv = 3; + else + fac_uv = 5; + } + if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN && + cpi->rc.high_source_sad) { + shift_lower_limit = 7; + } else if (source_sad_nonrd >= kMedSad && x->source_variance > 500 && + cpi->common.width * cpi->common.height >= 640 * 360) { + shift_upper_limit = 2; + shift_lower_limit = source_sad_nonrd > kMedSad ? 5 : 4; + } + + MB_MODE_INFO *mi = xd->mi[0]; + const AV1_COMMON *const cm = &cpi->common; + const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, LAST_FRAME); + const YV12_BUFFER_CONFIG *yv12_g = get_ref_frame_yv12_buf(cm, GOLDEN_FRAME); + const YV12_BUFFER_CONFIG *yv12_alt = get_ref_frame_yv12_buf(cm, ALTREF_FRAME); + const struct scale_factors *const sf = + get_ref_scale_factors_const(cm, LAST_FRAME); + struct buf_2d dst; + unsigned int uv_sad_g = 0; + unsigned int uv_sad_alt = 0; + + for (int plane = AOM_PLANE_U; plane < MAX_MB_PLANE; ++plane) { + struct macroblock_plane *p = &x->plane[plane]; + struct macroblockd_plane *pd = &xd->plane[plane]; + const BLOCK_SIZE bs = + get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y); + + if (bs != BLOCK_INVALID) { + // For last: + if (zero_motion) { + if (mi->ref_frame[0] == LAST_FRAME) { + uv_sad[plane - 1] = cpi->ppi->fn_ptr[bs].sdf( + p->src.buf, p->src.stride, pd->pre[0].buf, pd->pre[0].stride); + } else { + uint8_t *src = (plane == 1) ? yv12->u_buffer : yv12->v_buffer; + setup_pred_plane(&dst, xd->mi[0]->bsize, src, yv12->uv_crop_width, + yv12->uv_crop_height, yv12->uv_stride, xd->mi_row, + xd->mi_col, sf, xd->plane[plane].subsampling_x, + xd->plane[plane].subsampling_y); + + uv_sad[plane - 1] = cpi->ppi->fn_ptr[bs].sdf( + p->src.buf, p->src.stride, dst.buf, dst.stride); + } + } else { + uv_sad[plane - 1] = cpi->ppi->fn_ptr[bs].sdf( + p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride); + } + + // For golden: + if (y_sad_g != UINT_MAX) { + uint8_t *src = (plane == 1) ? yv12_g->u_buffer : yv12_g->v_buffer; + setup_pred_plane(&dst, xd->mi[0]->bsize, src, yv12_g->uv_crop_width, + yv12_g->uv_crop_height, yv12_g->uv_stride, xd->mi_row, + xd->mi_col, sf, xd->plane[plane].subsampling_x, + xd->plane[plane].subsampling_y); + uv_sad_g = cpi->ppi->fn_ptr[bs].sdf(p->src.buf, p->src.stride, dst.buf, + dst.stride); + } + + // For altref: + if (y_sad_alt != UINT_MAX) { + uint8_t *src = (plane == 1) ? yv12_alt->u_buffer : yv12_alt->v_buffer; + setup_pred_plane(&dst, xd->mi[0]->bsize, src, yv12_alt->uv_crop_width, + yv12_alt->uv_crop_height, yv12_alt->uv_stride, + xd->mi_row, xd->mi_col, sf, + xd->plane[plane].subsampling_x, + xd->plane[plane].subsampling_y); + uv_sad_alt = cpi->ppi->fn_ptr[bs].sdf(p->src.buf, p->src.stride, + dst.buf, dst.stride); + } + } + + if (uv_sad[plane - 1] > (y_sad >> shift_upper_limit)) + x->color_sensitivity_sb[COLOR_SENS_IDX(plane)] = 1; + else if (uv_sad[plane - 1] < (y_sad >> shift_lower_limit)) + x->color_sensitivity_sb[COLOR_SENS_IDX(plane)] = 0; + // Borderline case: to be refined at coding block level in nonrd_pickmode, + // for coding block size < sb_size. + else + x->color_sensitivity_sb[COLOR_SENS_IDX(plane)] = 2; + + x->color_sensitivity_sb_g[COLOR_SENS_IDX(plane)] = + uv_sad_g > y_sad_g / fac_uv; + x->color_sensitivity_sb_alt[COLOR_SENS_IDX(plane)] = + uv_sad_alt > y_sad_alt / fac_uv; + } +} + +static void fill_variance_tree_leaves( + AV1_COMP *cpi, MACROBLOCK *x, VP128x128 *vt, PART_EVAL_STATUS *force_split, + int avg_16x16[][4], int maxvar_16x16[][4], int minvar_16x16[][4], + int64_t *thresholds, const uint8_t *src_buf, int src_stride, + const uint8_t *dst_buf, int dst_stride, bool is_key_frame, + const bool is_small_sb) { + MACROBLOCKD *xd = &x->e_mbd; + const int num_64x64_blocks = is_small_sb ? 1 : 4; + // TODO(kyslov) Bring back compute_minmax_variance with content type detection + const int compute_minmax_variance = 0; + const int segment_id = xd->mi[0]->segment_id; + int pixels_wide = 128, pixels_high = 128; + int border_offset_4x4 = 0; + int temporal_denoising = cpi->sf.rt_sf.use_rtc_tf; + // dst_buf pointer is not used for is_key_frame, so it should be NULL. + assert(IMPLIES(is_key_frame, dst_buf == NULL)); + if (is_small_sb) { + pixels_wide = 64; + pixels_high = 64; + } + if (xd->mb_to_right_edge < 0) pixels_wide += (xd->mb_to_right_edge >> 3); + if (xd->mb_to_bottom_edge < 0) pixels_high += (xd->mb_to_bottom_edge >> 3); +#if CONFIG_AV1_TEMPORAL_DENOISING + temporal_denoising |= cpi->oxcf.noise_sensitivity; +#endif + // For temporal filtering or temporal denoiser enabled: since the source + // is modified we need to avoid 4x4 avg along superblock boundary, since + // simd code will load 8 pixels for 4x4 avg and so can access source + // data outside superblock (while its being modified by temporal filter). + // Temporal filtering is never done on key frames. + if (!is_key_frame && temporal_denoising) border_offset_4x4 = 4; + for (int blk64_idx = 0; blk64_idx < num_64x64_blocks; blk64_idx++) { + const int x64_idx = GET_BLK_IDX_X(blk64_idx, 6); + const int y64_idx = GET_BLK_IDX_Y(blk64_idx, 6); + const int blk64_scale_idx = blk64_idx << 2; + force_split[blk64_idx + 1] = PART_EVAL_ALL; + + for (int lvl1_idx = 0; lvl1_idx < 4; lvl1_idx++) { + const int x32_idx = x64_idx + GET_BLK_IDX_X(lvl1_idx, 5); + const int y32_idx = y64_idx + GET_BLK_IDX_Y(lvl1_idx, 5); + const int lvl1_scale_idx = (blk64_scale_idx + lvl1_idx) << 2; + force_split[5 + blk64_scale_idx + lvl1_idx] = PART_EVAL_ALL; + avg_16x16[blk64_idx][lvl1_idx] = 0; + maxvar_16x16[blk64_idx][lvl1_idx] = 0; + minvar_16x16[blk64_idx][lvl1_idx] = INT_MAX; + for (int lvl2_idx = 0; lvl2_idx < 4; lvl2_idx++) { + const int x16_idx = x32_idx + GET_BLK_IDX_X(lvl2_idx, 4); + const int y16_idx = y32_idx + GET_BLK_IDX_Y(lvl2_idx, 4); + const int split_index = 21 + lvl1_scale_idx + lvl2_idx; + VP16x16 *vst = &vt->split[blk64_idx].split[lvl1_idx].split[lvl2_idx]; + force_split[split_index] = PART_EVAL_ALL; + if (is_key_frame) { + // Go down to 4x4 down-sampling for variance. + for (int lvl3_idx = 0; lvl3_idx < 4; lvl3_idx++) { + const int x8_idx = x16_idx + GET_BLK_IDX_X(lvl3_idx, 3); + const int y8_idx = y16_idx + GET_BLK_IDX_Y(lvl3_idx, 3); + VP8x8 *vst2 = &vst->split[lvl3_idx]; + fill_variance_4x4avg(src_buf, src_stride, x8_idx, y8_idx, vst2, +#if CONFIG_AV1_HIGHBITDEPTH + xd->cur_buf->flags, +#endif + pixels_wide, pixels_high, border_offset_4x4); + } + } else { + fill_variance_8x8avg(src_buf, src_stride, dst_buf, dst_stride, + x16_idx, y16_idx, vst, is_cur_buf_hbd(xd), + pixels_wide, pixels_high); + + fill_variance_tree(vst, BLOCK_16X16); + VPartVar *none_var = &vt->split[blk64_idx] + .split[lvl1_idx] + .split[lvl2_idx] + .part_variances.none; + get_variance(none_var); + const int val_none_var = none_var->variance; + avg_16x16[blk64_idx][lvl1_idx] += val_none_var; + minvar_16x16[blk64_idx][lvl1_idx] = + AOMMIN(minvar_16x16[blk64_idx][lvl1_idx], val_none_var); + maxvar_16x16[blk64_idx][lvl1_idx] = + AOMMAX(maxvar_16x16[blk64_idx][lvl1_idx], val_none_var); + if (val_none_var > thresholds[3]) { + // 16X16 variance is above threshold for split, so force split to + // 8x8 for this 16x16 block (this also forces splits for upper + // levels). + force_split[split_index] = PART_EVAL_ONLY_SPLIT; + force_split[5 + blk64_scale_idx + lvl1_idx] = PART_EVAL_ONLY_SPLIT; + force_split[blk64_idx + 1] = PART_EVAL_ONLY_SPLIT; + force_split[0] = PART_EVAL_ONLY_SPLIT; + } else if (!cyclic_refresh_segment_id_boosted(segment_id) && + compute_minmax_variance && val_none_var > thresholds[2]) { + // We have some nominal amount of 16x16 variance (based on average), + // compute the minmax over the 8x8 sub-blocks, and if above + // threshold, force split to 8x8 block for this 16x16 block. + int minmax = compute_minmax_8x8(src_buf, src_stride, dst_buf, + dst_stride, x16_idx, y16_idx, +#if CONFIG_AV1_HIGHBITDEPTH + xd->cur_buf->flags, +#endif + pixels_wide, pixels_high); + const int thresh_minmax = (int)cpi->vbp_info.threshold_minmax; + if (minmax > thresh_minmax) { + force_split[split_index] = PART_EVAL_ONLY_SPLIT; + force_split[5 + blk64_scale_idx + lvl1_idx] = + PART_EVAL_ONLY_SPLIT; + force_split[blk64_idx + 1] = PART_EVAL_ONLY_SPLIT; + force_split[0] = PART_EVAL_ONLY_SPLIT; + } + } + } + } + } + } +} + +static AOM_INLINE void set_ref_frame_for_partition( + AV1_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, + MV_REFERENCE_FRAME *ref_frame_partition, MB_MODE_INFO *mi, + unsigned int *y_sad, unsigned int *y_sad_g, unsigned int *y_sad_alt, + const YV12_BUFFER_CONFIG *yv12_g, const YV12_BUFFER_CONFIG *yv12_alt, + int mi_row, int mi_col, int num_planes) { + AV1_COMMON *const cm = &cpi->common; + const bool is_set_golden_ref_frame = + *y_sad_g < 0.9 * *y_sad && *y_sad_g < *y_sad_alt; + const bool is_set_altref_ref_frame = + *y_sad_alt < 0.9 * *y_sad && *y_sad_alt < *y_sad_g; + + if (is_set_golden_ref_frame) { + av1_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col, + get_ref_scale_factors(cm, GOLDEN_FRAME), num_planes); + mi->ref_frame[0] = GOLDEN_FRAME; + mi->mv[0].as_int = 0; + *y_sad = *y_sad_g; + *ref_frame_partition = GOLDEN_FRAME; + x->nonrd_prune_ref_frame_search = 0; + x->sb_me_partition = 0; + } else if (is_set_altref_ref_frame) { + av1_setup_pre_planes(xd, 0, yv12_alt, mi_row, mi_col, + get_ref_scale_factors(cm, ALTREF_FRAME), num_planes); + mi->ref_frame[0] = ALTREF_FRAME; + mi->mv[0].as_int = 0; + *y_sad = *y_sad_alt; + *ref_frame_partition = ALTREF_FRAME; + x->nonrd_prune_ref_frame_search = 0; + x->sb_me_partition = 0; + } else { + *ref_frame_partition = LAST_FRAME; + x->nonrd_prune_ref_frame_search = + cpi->sf.rt_sf.nonrd_prune_ref_frame_search; + } +} + +static AOM_FORCE_INLINE int mv_distance(const FULLPEL_MV *mv0, + const FULLPEL_MV *mv1) { + return abs(mv0->row - mv1->row) + abs(mv0->col - mv1->col); +} + +static AOM_INLINE void evaluate_neighbour_mvs(AV1_COMP *cpi, MACROBLOCK *x, + unsigned int *y_sad, + bool is_small_sb, + int est_motion) { + const int source_sad_nonrd = x->content_state_sb.source_sad_nonrd; + // TODO(yunqingwang@google.com): test if this condition works with other + // speeds. + if (est_motion > 2 && source_sad_nonrd > kMedSad) return; + + MACROBLOCKD *xd = &x->e_mbd; + BLOCK_SIZE bsize = is_small_sb ? BLOCK_64X64 : BLOCK_128X128; + MB_MODE_INFO *mi = xd->mi[0]; + + unsigned int above_y_sad = UINT_MAX; + unsigned int left_y_sad = UINT_MAX; + FULLPEL_MV above_mv = kZeroFullMv; + FULLPEL_MV left_mv = kZeroFullMv; + SubpelMvLimits subpel_mv_limits; + const MV dummy_mv = { 0, 0 }; + av1_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, &dummy_mv); + + // Current best MV + FULLPEL_MV best_mv = get_fullmv_from_mv(&mi->mv[0].as_mv); + const int multi = (est_motion > 2 && source_sad_nonrd > kLowSad) ? 7 : 8; + + if (xd->up_available) { + const MB_MODE_INFO *above_mbmi = xd->above_mbmi; + if (above_mbmi->mode >= INTRA_MODE_END && + above_mbmi->ref_frame[0] == LAST_FRAME) { + MV temp = above_mbmi->mv[0].as_mv; + clamp_mv(&temp, &subpel_mv_limits); + above_mv = get_fullmv_from_mv(&temp); + + if (mv_distance(&best_mv, &above_mv) > 0) { + uint8_t const *ref_buf = + get_buf_from_fullmv(&xd->plane[0].pre[0], &above_mv); + above_y_sad = cpi->ppi->fn_ptr[bsize].sdf( + x->plane[0].src.buf, x->plane[0].src.stride, ref_buf, + xd->plane[0].pre[0].stride); + } + } + } + if (xd->left_available) { + const MB_MODE_INFO *left_mbmi = xd->left_mbmi; + if (left_mbmi->mode >= INTRA_MODE_END && + left_mbmi->ref_frame[0] == LAST_FRAME) { + MV temp = left_mbmi->mv[0].as_mv; + clamp_mv(&temp, &subpel_mv_limits); + left_mv = get_fullmv_from_mv(&temp); + + if (mv_distance(&best_mv, &left_mv) > 0 && + mv_distance(&above_mv, &left_mv) > 0) { + uint8_t const *ref_buf = + get_buf_from_fullmv(&xd->plane[0].pre[0], &left_mv); + left_y_sad = cpi->ppi->fn_ptr[bsize].sdf( + x->plane[0].src.buf, x->plane[0].src.stride, ref_buf, + xd->plane[0].pre[0].stride); + } + } + } + + if (above_y_sad < ((multi * *y_sad) >> 3) && above_y_sad < left_y_sad) { + *y_sad = above_y_sad; + mi->mv[0].as_mv = get_mv_from_fullmv(&above_mv); + clamp_mv(&mi->mv[0].as_mv, &subpel_mv_limits); + } + if (left_y_sad < ((multi * *y_sad) >> 3) && left_y_sad < above_y_sad) { + *y_sad = left_y_sad; + mi->mv[0].as_mv = get_mv_from_fullmv(&left_mv); + clamp_mv(&mi->mv[0].as_mv, &subpel_mv_limits); + } +} + +static void setup_planes(AV1_COMP *cpi, MACROBLOCK *x, unsigned int *y_sad, + unsigned int *y_sad_g, unsigned int *y_sad_alt, + unsigned int *y_sad_last, + MV_REFERENCE_FRAME *ref_frame_partition, + struct scale_factors *sf_no_scale, int mi_row, + int mi_col, bool is_small_sb, bool scaled_ref_last) { + AV1_COMMON *const cm = &cpi->common; + MACROBLOCKD *xd = &x->e_mbd; + const int num_planes = av1_num_planes(cm); + bool scaled_ref_golden = false; + bool scaled_ref_alt = false; + BLOCK_SIZE bsize = is_small_sb ? BLOCK_64X64 : BLOCK_128X128; + MB_MODE_INFO *mi = xd->mi[0]; + const YV12_BUFFER_CONFIG *yv12 = + scaled_ref_last ? av1_get_scaled_ref_frame(cpi, LAST_FRAME) + : get_ref_frame_yv12_buf(cm, LAST_FRAME); + assert(yv12 != NULL); + const YV12_BUFFER_CONFIG *yv12_g = NULL; + const YV12_BUFFER_CONFIG *yv12_alt = NULL; + // Check if LAST is a reference. For spatial layers always use it as + // reference scaling. + int use_last_ref = (cpi->ref_frame_flags & AOM_LAST_FLAG) || + cpi->svc.number_spatial_layers > 1; + int use_golden_ref = cpi->ref_frame_flags & AOM_GOLD_FLAG; + int use_alt_ref = cpi->ppi->rtc_ref.set_ref_frame_config || + cpi->sf.rt_sf.use_nonrd_altref_frame || + (cpi->sf.rt_sf.use_comp_ref_nonrd && + cpi->sf.rt_sf.ref_frame_comp_nonrd[2] == 1); + + // For 1 spatial layer: GOLDEN is another temporal reference. + // Check if it should be used as reference for partitioning. + if (cpi->svc.number_spatial_layers == 1 && use_golden_ref && + (x->content_state_sb.source_sad_nonrd != kZeroSad || !use_last_ref)) { + yv12_g = get_ref_frame_yv12_buf(cm, GOLDEN_FRAME); + if (yv12_g && (yv12_g->y_crop_height != cm->height || + yv12_g->y_crop_width != cm->width)) { + yv12_g = av1_get_scaled_ref_frame(cpi, GOLDEN_FRAME); + scaled_ref_golden = true; + } + if (yv12_g && yv12_g != yv12) { + av1_setup_pre_planes( + xd, 0, yv12_g, mi_row, mi_col, + scaled_ref_golden ? NULL : get_ref_scale_factors(cm, GOLDEN_FRAME), + num_planes); + *y_sad_g = cpi->ppi->fn_ptr[bsize].sdf( + x->plane[AOM_PLANE_Y].src.buf, x->plane[AOM_PLANE_Y].src.stride, + xd->plane[AOM_PLANE_Y].pre[0].buf, + xd->plane[AOM_PLANE_Y].pre[0].stride); + } + } + + // For 1 spatial layer: ALTREF is another temporal reference. + // Check if it should be used as reference for partitioning. + if (cpi->svc.number_spatial_layers == 1 && use_alt_ref && + (cpi->ref_frame_flags & AOM_ALT_FLAG) && + (x->content_state_sb.source_sad_nonrd != kZeroSad || !use_last_ref)) { + yv12_alt = get_ref_frame_yv12_buf(cm, ALTREF_FRAME); + if (yv12_alt && (yv12_alt->y_crop_height != cm->height || + yv12_alt->y_crop_width != cm->width)) { + yv12_alt = av1_get_scaled_ref_frame(cpi, ALTREF_FRAME); + scaled_ref_alt = true; + } + if (yv12_alt && yv12_alt != yv12) { + av1_setup_pre_planes( + xd, 0, yv12_alt, mi_row, mi_col, + scaled_ref_alt ? NULL : get_ref_scale_factors(cm, ALTREF_FRAME), + num_planes); + *y_sad_alt = cpi->ppi->fn_ptr[bsize].sdf( + x->plane[AOM_PLANE_Y].src.buf, x->plane[AOM_PLANE_Y].src.stride, + xd->plane[AOM_PLANE_Y].pre[0].buf, + xd->plane[AOM_PLANE_Y].pre[0].stride); + } + } + + if (use_last_ref) { + const int source_sad_nonrd = x->content_state_sb.source_sad_nonrd; + av1_setup_pre_planes( + xd, 0, yv12, mi_row, mi_col, + scaled_ref_last ? NULL : get_ref_scale_factors(cm, LAST_FRAME), + num_planes); + mi->ref_frame[0] = LAST_FRAME; + mi->ref_frame[1] = NONE_FRAME; + mi->bsize = cm->seq_params->sb_size; + mi->mv[0].as_int = 0; + mi->interp_filters = av1_broadcast_interp_filter(BILINEAR); + + int est_motion = cpi->sf.rt_sf.estimate_motion_for_var_based_partition; + // TODO(b/290596301): Look into adjusting this condition. + // There is regression on color content when + // estimate_motion_for_var_based_partition = 3 and high motion, + // so for now force it to 2 based on superblock sad. + if (est_motion > 2 && source_sad_nonrd > kMedSad) est_motion = 2; + + if (est_motion == 1 || est_motion == 2) { + if (xd->mb_to_right_edge >= 0 && xd->mb_to_bottom_edge >= 0) { + // For screen only do int_pro_motion for spatial variance above + // threshold and motion level above LowSad. + if (x->source_variance > 100 && source_sad_nonrd > kLowSad) { + int is_screen = cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN; + int me_search_size_col = + is_screen ? 96 : block_size_wide[cm->seq_params->sb_size] >> 1; + // For screen use larger search size row motion to capture + // vertical scroll, which can be larger motion. + int me_search_size_row = + is_screen ? 192 : block_size_high[cm->seq_params->sb_size] >> 1; + unsigned int y_sad_zero; + *y_sad = av1_int_pro_motion_estimation( + cpi, x, cm->seq_params->sb_size, mi_row, mi_col, &kZeroMv, + &y_sad_zero, me_search_size_col, me_search_size_row); + // The logic below selects whether the motion estimated in the + // int_pro_motion() will be used in nonrd_pickmode. Only do this + // for screen for now. + if (is_screen) { + unsigned int thresh_sad = + (cm->seq_params->sb_size == BLOCK_128X128) ? 50000 : 20000; + if (*y_sad < (y_sad_zero >> 1) && *y_sad < thresh_sad) { + x->sb_me_partition = 1; + x->sb_me_mv.as_int = mi->mv[0].as_int; + } else { + x->sb_me_partition = 0; + // Fall back to using zero motion. + *y_sad = y_sad_zero; + mi->mv[0].as_int = 0; + } + } + } + } + } + + if (*y_sad == UINT_MAX) { + *y_sad = cpi->ppi->fn_ptr[bsize].sdf( + x->plane[AOM_PLANE_Y].src.buf, x->plane[AOM_PLANE_Y].src.stride, + xd->plane[AOM_PLANE_Y].pre[0].buf, + xd->plane[AOM_PLANE_Y].pre[0].stride); + } + + // Evaluate if neighbours' MVs give better predictions. Zero MV is tested + // already, so only non-zero MVs are tested here. Here the neighbour blocks + // are the first block above or left to this superblock. + if (est_motion >= 2 && (xd->up_available || xd->left_available)) + evaluate_neighbour_mvs(cpi, x, y_sad, is_small_sb, est_motion); + + *y_sad_last = *y_sad; + } + + // Pick the ref frame for partitioning, use golden or altref frame only if + // its lower sad, bias to LAST with factor 0.9. + set_ref_frame_for_partition(cpi, x, xd, ref_frame_partition, mi, y_sad, + y_sad_g, y_sad_alt, yv12_g, yv12_alt, mi_row, + mi_col, num_planes); + + // Only calculate the predictor for non-zero MV. + if (mi->mv[0].as_int != 0) { + if (!scaled_ref_last) { + set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]); + } else { + xd->block_ref_scale_factors[0] = sf_no_scale; + xd->block_ref_scale_factors[1] = sf_no_scale; + } + av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, + cm->seq_params->sb_size, AOM_PLANE_Y, + num_planes - 1); + } +} + +// Decides whether to split or merge a 16x16 partition block in variance based +// partitioning based on the 8x8 sub-block variances. +static AOM_INLINE PART_EVAL_STATUS get_part_eval_based_on_sub_blk_var( + VP16x16 *var_16x16_info, int64_t threshold16) { + int max_8x8_var = 0, min_8x8_var = INT_MAX; + for (int split_idx = 0; split_idx < 4; split_idx++) { + get_variance(&var_16x16_info->split[split_idx].part_variances.none); + int this_8x8_var = + var_16x16_info->split[split_idx].part_variances.none.variance; + max_8x8_var = AOMMAX(this_8x8_var, max_8x8_var); + min_8x8_var = AOMMIN(this_8x8_var, min_8x8_var); + } + // If the difference between maximum and minimum sub-block variances is high, + // then only evaluate PARTITION_SPLIT for the 16x16 block. Otherwise, evaluate + // only PARTITION_NONE. The shift factor for threshold16 has been derived + // empirically. + return ((max_8x8_var - min_8x8_var) > (threshold16 << 2)) + ? PART_EVAL_ONLY_SPLIT + : PART_EVAL_ONLY_NONE; +} + +static AOM_INLINE bool is_set_force_zeromv_skip_based_on_src_sad( + int set_zeromv_skip_based_on_source_sad, SOURCE_SAD source_sad_nonrd) { + if (set_zeromv_skip_based_on_source_sad == 0) return false; + + if (set_zeromv_skip_based_on_source_sad >= 3) + return source_sad_nonrd <= kLowSad; + else if (set_zeromv_skip_based_on_source_sad >= 2) + return source_sad_nonrd <= kVeryLowSad; + else if (set_zeromv_skip_based_on_source_sad >= 1) + return source_sad_nonrd == kZeroSad; + + return false; +} + +static AOM_INLINE bool set_force_zeromv_skip_for_sb( + AV1_COMP *cpi, MACROBLOCK *x, const TileInfo *const tile, VP128x128 *vt, + unsigned int *uv_sad, int mi_row, int mi_col, unsigned int y_sad, + BLOCK_SIZE bsize) { + AV1_COMMON *const cm = &cpi->common; + if (!is_set_force_zeromv_skip_based_on_src_sad( + cpi->sf.rt_sf.set_zeromv_skip_based_on_source_sad, + x->content_state_sb.source_sad_nonrd)) + return false; + int shift = cpi->sf.rt_sf.increase_source_sad_thresh ? 1 : 0; + const int block_width = mi_size_wide[cm->seq_params->sb_size]; + const int block_height = mi_size_high[cm->seq_params->sb_size]; + const unsigned int thresh_exit_part_y = + cpi->zeromv_skip_thresh_exit_part[bsize] << shift; + unsigned int thresh_exit_part_uv = + CALC_CHROMA_THRESH_FOR_ZEROMV_SKIP(thresh_exit_part_y) << shift; + // Be more aggressive in UV threshold if source_sad >= VeryLowSad + // to suppreess visual artifact caused by the speed feature: + // set_zeromv_skip_based_on_source_sad = 2. For now only for + // part_early_exit_zeromv = 1. + if (x->content_state_sb.source_sad_nonrd >= kVeryLowSad && + cpi->sf.rt_sf.part_early_exit_zeromv == 1) + thresh_exit_part_uv = thresh_exit_part_uv >> 3; + if (mi_col + block_width <= tile->mi_col_end && + mi_row + block_height <= tile->mi_row_end && y_sad < thresh_exit_part_y && + uv_sad[0] < thresh_exit_part_uv && uv_sad[1] < thresh_exit_part_uv) { + set_block_size(cpi, mi_row, mi_col, bsize); + x->force_zeromv_skip_for_sb = 1; + aom_free(vt); + // Partition shape is set here at SB level. + // Exit needs to happen from av1_choose_var_based_partitioning(). + return true; + } else if (x->content_state_sb.source_sad_nonrd == kZeroSad && + cpi->sf.rt_sf.part_early_exit_zeromv >= 2) + x->force_zeromv_skip_for_sb = 2; + return false; +} + +int av1_choose_var_based_partitioning(AV1_COMP *cpi, const TileInfo *const tile, + ThreadData *td, MACROBLOCK *x, int mi_row, + int mi_col) { +#if CONFIG_COLLECT_COMPONENT_TIMING + start_timing(cpi, choose_var_based_partitioning_time); +#endif + AV1_COMMON *const cm = &cpi->common; + MACROBLOCKD *xd = &x->e_mbd; + const int64_t *const vbp_thresholds = cpi->vbp_info.thresholds; + PART_EVAL_STATUS force_split[85]; + int avg_64x64; + int max_var_32x32[4]; + int min_var_32x32[4]; + int var_32x32; + int var_64x64; + int min_var_64x64 = INT_MAX; + int max_var_64x64 = 0; + int avg_16x16[4][4]; + int maxvar_16x16[4][4]; + int minvar_16x16[4][4]; + const uint8_t *src_buf; + const uint8_t *dst_buf; + int dst_stride; + unsigned int uv_sad[MAX_MB_PLANE - 1]; + NOISE_LEVEL noise_level = kLow; + bool is_zero_motion = true; + bool scaled_ref_last = false; + struct scale_factors sf_no_scale; + av1_setup_scale_factors_for_frame(&sf_no_scale, cm->width, cm->height, + cm->width, cm->height); + + bool is_key_frame = + (frame_is_intra_only(cm) || + (cpi->ppi->use_svc && + cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)); + + assert(cm->seq_params->sb_size == BLOCK_64X64 || + cm->seq_params->sb_size == BLOCK_128X128); + const bool is_small_sb = (cm->seq_params->sb_size == BLOCK_64X64); + const int num_64x64_blocks = is_small_sb ? 1 : 4; + + unsigned int y_sad = UINT_MAX; + unsigned int y_sad_g = UINT_MAX; + unsigned int y_sad_alt = UINT_MAX; + unsigned int y_sad_last = UINT_MAX; + BLOCK_SIZE bsize = is_small_sb ? BLOCK_64X64 : BLOCK_128X128; + + // Ref frame used in partitioning. + MV_REFERENCE_FRAME ref_frame_partition = LAST_FRAME; + + int64_t thresholds[5] = { vbp_thresholds[0], vbp_thresholds[1], + vbp_thresholds[2], vbp_thresholds[3], + vbp_thresholds[4] }; + + const int segment_id = xd->mi[0]->segment_id; + uint64_t blk_sad = 0; + if (cpi->src_sad_blk_64x64 != NULL && + cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1) { + const int sb_size_by_mb = (cm->seq_params->sb_size == BLOCK_128X128) + ? (cm->seq_params->mib_size >> 1) + : cm->seq_params->mib_size; + const int sb_cols = + (cm->mi_params.mi_cols + sb_size_by_mb - 1) / sb_size_by_mb; + const int sbi_col = mi_col / sb_size_by_mb; + const int sbi_row = mi_row / sb_size_by_mb; + blk_sad = cpi->src_sad_blk_64x64[sbi_col + sbi_row * sb_cols]; + } + + const bool is_segment_id_boosted = + cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled && + cyclic_refresh_segment_id_boosted(segment_id); + const int qindex = + is_segment_id_boosted + ? av1_get_qindex(&cm->seg, segment_id, cm->quant_params.base_qindex) + : cm->quant_params.base_qindex; + set_vbp_thresholds( + cpi, thresholds, blk_sad, qindex, x->content_state_sb.low_sumdiff, + x->content_state_sb.source_sad_nonrd, x->content_state_sb.source_sad_rd, + is_segment_id_boosted, x->content_state_sb.lighting_change); + + src_buf = x->plane[AOM_PLANE_Y].src.buf; + int src_stride = x->plane[AOM_PLANE_Y].src.stride; + + // Index for force_split: 0 for 64x64, 1-4 for 32x32 blocks, + // 5-20 for the 16x16 blocks. + force_split[0] = PART_EVAL_ALL; + memset(x->part_search_info.variance_low, 0, + sizeof(x->part_search_info.variance_low)); + + // Check if LAST frame is NULL, and if so, treat this frame + // as a key frame, for the purpose of the superblock partitioning. + // LAST == NULL can happen in cases where enhancement spatial layers are + // enabled dyanmically and the only reference is the spatial(GOLDEN). + // If LAST frame has a different resolution: set the scaled_ref_last flag + // and check if ref_scaled is NULL. + if (!frame_is_intra_only(cm)) { + const YV12_BUFFER_CONFIG *ref = get_ref_frame_yv12_buf(cm, LAST_FRAME); + if (ref == NULL) { + is_key_frame = true; + } else if (ref->y_crop_height != cm->height || + ref->y_crop_width != cm->width) { + scaled_ref_last = true; + const YV12_BUFFER_CONFIG *ref_scaled = + av1_get_scaled_ref_frame(cpi, LAST_FRAME); + if (ref_scaled == NULL) is_key_frame = true; + } + } + + x->source_variance = UINT_MAX; + // For nord_pickmode: compute source_variance, only for superblocks with + // some motion for now. This input can then be used to bias the partitioning + // or the chroma_check. + if (cpi->sf.rt_sf.use_nonrd_pick_mode && + x->content_state_sb.source_sad_nonrd > kLowSad) + x->source_variance = av1_get_perpixel_variance_facade( + cpi, xd, &x->plane[0].src, cm->seq_params->sb_size, AOM_PLANE_Y); + + if (!is_key_frame) { + setup_planes(cpi, x, &y_sad, &y_sad_g, &y_sad_alt, &y_sad_last, + &ref_frame_partition, &sf_no_scale, mi_row, mi_col, + is_small_sb, scaled_ref_last); + + MB_MODE_INFO *mi = xd->mi[0]; + // Use reference SB directly for zero mv. + if (mi->mv[0].as_int != 0) { + dst_buf = xd->plane[AOM_PLANE_Y].dst.buf; + dst_stride = xd->plane[AOM_PLANE_Y].dst.stride; + is_zero_motion = false; + } else { + dst_buf = xd->plane[AOM_PLANE_Y].pre[0].buf; + dst_stride = xd->plane[AOM_PLANE_Y].pre[0].stride; + } + } else { + dst_buf = NULL; + dst_stride = 0; + } + + // check and set the color sensitivity of sb. + av1_zero(uv_sad); + chroma_check(cpi, x, bsize, y_sad_last, y_sad_g, y_sad_alt, is_key_frame, + is_zero_motion, uv_sad); + + x->force_zeromv_skip_for_sb = 0; + + VP128x128 *vt; + AOM_CHECK_MEM_ERROR(xd->error_info, vt, aom_malloc(sizeof(*vt))); + vt->split = td->vt64x64; + + // If the superblock is completely static (zero source sad) and + // the y_sad (relative to LAST ref) is very small, take the sb_size partition + // and exit, and force zeromv_last skip mode for nonrd_pickmode. + // Only do this on the base segment (so the QP-boosted segment, if applied, + // can still continue cleaning/ramping up the quality). + // Condition on color uv_sad is also added. + if (!is_key_frame && cpi->sf.rt_sf.part_early_exit_zeromv && + cpi->rc.frames_since_key > 30 && segment_id == CR_SEGMENT_ID_BASE && + ref_frame_partition == LAST_FRAME && xd->mi[0]->mv[0].as_int == 0) { + // Exit here, if zero mv skip flag is set at SB level. + if (set_force_zeromv_skip_for_sb(cpi, x, tile, vt, uv_sad, mi_row, mi_col, + y_sad, bsize)) + return 0; + } + + if (cpi->noise_estimate.enabled) + noise_level = av1_noise_estimate_extract_level(&cpi->noise_estimate); + + // Fill in the entire tree of 8x8 (for inter frames) or 4x4 (for key frames) + // variances for splits. + fill_variance_tree_leaves(cpi, x, vt, force_split, avg_16x16, maxvar_16x16, + minvar_16x16, thresholds, src_buf, src_stride, + dst_buf, dst_stride, is_key_frame, is_small_sb); + + avg_64x64 = 0; + for (int blk64_idx = 0; blk64_idx < num_64x64_blocks; ++blk64_idx) { + max_var_32x32[blk64_idx] = 0; + min_var_32x32[blk64_idx] = INT_MAX; + const int blk64_scale_idx = blk64_idx << 2; + for (int lvl1_idx = 0; lvl1_idx < 4; lvl1_idx++) { + const int lvl1_scale_idx = (blk64_scale_idx + lvl1_idx) << 2; + for (int lvl2_idx = 0; lvl2_idx < 4; lvl2_idx++) { + if (!is_key_frame) continue; + VP16x16 *vtemp = &vt->split[blk64_idx].split[lvl1_idx].split[lvl2_idx]; + for (int lvl3_idx = 0; lvl3_idx < 4; lvl3_idx++) + fill_variance_tree(&vtemp->split[lvl3_idx], BLOCK_8X8); + fill_variance_tree(vtemp, BLOCK_16X16); + // If variance of this 16x16 block is above the threshold, force block + // to split. This also forces a split on the upper levels. + get_variance(&vtemp->part_variances.none); + if (vtemp->part_variances.none.variance > thresholds[3]) { + const int split_index = 21 + lvl1_scale_idx + lvl2_idx; + force_split[split_index] = + cpi->sf.rt_sf.vbp_prune_16x16_split_using_min_max_sub_blk_var + ? get_part_eval_based_on_sub_blk_var(vtemp, thresholds[3]) + : PART_EVAL_ONLY_SPLIT; + force_split[5 + blk64_scale_idx + lvl1_idx] = PART_EVAL_ONLY_SPLIT; + force_split[blk64_idx + 1] = PART_EVAL_ONLY_SPLIT; + force_split[0] = PART_EVAL_ONLY_SPLIT; + } + } + fill_variance_tree(&vt->split[blk64_idx].split[lvl1_idx], BLOCK_32X32); + // If variance of this 32x32 block is above the threshold, or if its above + // (some threshold of) the average variance over the sub-16x16 blocks, + // then force this block to split. This also forces a split on the upper + // (64x64) level. + uint64_t frame_sad_thresh = 20000; + const int is_360p_or_smaller = cm->width * cm->height <= RESOLUTION_360P; + if (cpi->svc.number_temporal_layers > 2 && + cpi->svc.temporal_layer_id == 0) + frame_sad_thresh = frame_sad_thresh << 1; + if (force_split[5 + blk64_scale_idx + lvl1_idx] == PART_EVAL_ALL) { + get_variance(&vt->split[blk64_idx].split[lvl1_idx].part_variances.none); + var_32x32 = + vt->split[blk64_idx].split[lvl1_idx].part_variances.none.variance; + max_var_32x32[blk64_idx] = AOMMAX(var_32x32, max_var_32x32[blk64_idx]); + min_var_32x32[blk64_idx] = AOMMIN(var_32x32, min_var_32x32[blk64_idx]); + const int max_min_var_16X16_diff = (maxvar_16x16[blk64_idx][lvl1_idx] - + minvar_16x16[blk64_idx][lvl1_idx]); + + if (var_32x32 > thresholds[2] || + (!is_key_frame && var_32x32 > (thresholds[2] >> 1) && + var_32x32 > (avg_16x16[blk64_idx][lvl1_idx] >> 1))) { + force_split[5 + blk64_scale_idx + lvl1_idx] = PART_EVAL_ONLY_SPLIT; + force_split[blk64_idx + 1] = PART_EVAL_ONLY_SPLIT; + force_split[0] = PART_EVAL_ONLY_SPLIT; + } else if (!is_key_frame && is_360p_or_smaller && + ((max_min_var_16X16_diff > (thresholds[2] >> 1) && + maxvar_16x16[blk64_idx][lvl1_idx] > thresholds[2]) || + (cpi->sf.rt_sf.prefer_large_partition_blocks && + x->content_state_sb.source_sad_nonrd > kLowSad && + cpi->rc.frame_source_sad < frame_sad_thresh && + maxvar_16x16[blk64_idx][lvl1_idx] > (thresholds[2] >> 4) && + maxvar_16x16[blk64_idx][lvl1_idx] > + (minvar_16x16[blk64_idx][lvl1_idx] << 2)))) { + force_split[5 + blk64_scale_idx + lvl1_idx] = PART_EVAL_ONLY_SPLIT; + force_split[blk64_idx + 1] = PART_EVAL_ONLY_SPLIT; + force_split[0] = PART_EVAL_ONLY_SPLIT; + } + } + } + if (force_split[1 + blk64_idx] == PART_EVAL_ALL) { + fill_variance_tree(&vt->split[blk64_idx], BLOCK_64X64); + get_variance(&vt->split[blk64_idx].part_variances.none); + var_64x64 = vt->split[blk64_idx].part_variances.none.variance; + max_var_64x64 = AOMMAX(var_64x64, max_var_64x64); + min_var_64x64 = AOMMIN(var_64x64, min_var_64x64); + // If the difference of the max-min variances of sub-blocks or max + // variance of a sub-block is above some threshold of then force this + // block to split. Only checking this for noise level >= medium, if + // encoder is in SVC or if we already forced large blocks. + const int max_min_var_32x32_diff = + max_var_32x32[blk64_idx] - min_var_32x32[blk64_idx]; + const int check_max_var = max_var_32x32[blk64_idx] > thresholds[1] >> 1; + const bool check_noise_lvl = noise_level >= kMedium || + cpi->ppi->use_svc || + cpi->sf.rt_sf.prefer_large_partition_blocks; + const int64_t set_threshold = 3 * (thresholds[1] >> 3); + + if (!is_key_frame && max_min_var_32x32_diff > set_threshold && + check_max_var && check_noise_lvl) { + force_split[1 + blk64_idx] = PART_EVAL_ONLY_SPLIT; + force_split[0] = PART_EVAL_ONLY_SPLIT; + } + avg_64x64 += var_64x64; + } + if (is_small_sb) force_split[0] = PART_EVAL_ONLY_SPLIT; + } + + if (force_split[0] == PART_EVAL_ALL) { + fill_variance_tree(vt, BLOCK_128X128); + get_variance(&vt->part_variances.none); + const int set_avg_64x64 = (9 * avg_64x64) >> 5; + if (!is_key_frame && vt->part_variances.none.variance > set_avg_64x64) + force_split[0] = PART_EVAL_ONLY_SPLIT; + + if (!is_key_frame && + (max_var_64x64 - min_var_64x64) > 3 * (thresholds[0] >> 3) && + max_var_64x64 > thresholds[0] >> 1) + force_split[0] = PART_EVAL_ONLY_SPLIT; + } + + if (mi_col + 32 > tile->mi_col_end || mi_row + 32 > tile->mi_row_end || + !set_vt_partitioning(cpi, xd, tile, vt, BLOCK_128X128, mi_row, mi_col, + thresholds[0], BLOCK_16X16, force_split[0])) { + for (int blk64_idx = 0; blk64_idx < num_64x64_blocks; ++blk64_idx) { + const int x64_idx = GET_BLK_IDX_X(blk64_idx, 4); + const int y64_idx = GET_BLK_IDX_Y(blk64_idx, 4); + const int blk64_scale_idx = blk64_idx << 2; + + // Now go through the entire structure, splitting every block size until + // we get to one that's got a variance lower than our threshold. + if (set_vt_partitioning(cpi, xd, tile, &vt->split[blk64_idx], BLOCK_64X64, + mi_row + y64_idx, mi_col + x64_idx, thresholds[1], + BLOCK_16X16, force_split[1 + blk64_idx])) + continue; + for (int lvl1_idx = 0; lvl1_idx < 4; ++lvl1_idx) { + const int x32_idx = GET_BLK_IDX_X(lvl1_idx, 3); + const int y32_idx = GET_BLK_IDX_Y(lvl1_idx, 3); + const int lvl1_scale_idx = (blk64_scale_idx + lvl1_idx) << 2; + if (set_vt_partitioning( + cpi, xd, tile, &vt->split[blk64_idx].split[lvl1_idx], + BLOCK_32X32, (mi_row + y64_idx + y32_idx), + (mi_col + x64_idx + x32_idx), thresholds[2], BLOCK_16X16, + force_split[5 + blk64_scale_idx + lvl1_idx])) + continue; + for (int lvl2_idx = 0; lvl2_idx < 4; ++lvl2_idx) { + const int x16_idx = GET_BLK_IDX_X(lvl2_idx, 2); + const int y16_idx = GET_BLK_IDX_Y(lvl2_idx, 2); + const int split_index = 21 + lvl1_scale_idx + lvl2_idx; + VP16x16 *vtemp = + &vt->split[blk64_idx].split[lvl1_idx].split[lvl2_idx]; + if (set_vt_partitioning(cpi, xd, tile, vtemp, BLOCK_16X16, + mi_row + y64_idx + y32_idx + y16_idx, + mi_col + x64_idx + x32_idx + x16_idx, + thresholds[3], BLOCK_8X8, + force_split[split_index])) + continue; + for (int lvl3_idx = 0; lvl3_idx < 4; ++lvl3_idx) { + const int x8_idx = GET_BLK_IDX_X(lvl3_idx, 1); + const int y8_idx = GET_BLK_IDX_Y(lvl3_idx, 1); + set_block_size(cpi, (mi_row + y64_idx + y32_idx + y16_idx + y8_idx), + (mi_col + x64_idx + x32_idx + x16_idx + x8_idx), + BLOCK_8X8); + } + } + } + } + } + + if (cpi->sf.rt_sf.short_circuit_low_temp_var) { + set_low_temp_var_flag(cpi, &x->part_search_info, xd, vt, thresholds, + ref_frame_partition, mi_col, mi_row, is_small_sb); + } + + aom_free(vt); +#if CONFIG_COLLECT_COMPONENT_TIMING + end_timing(cpi, choose_var_based_partitioning_time); +#endif + return 0; +} |