From d8bbc7858622b6d9c278469aab701ca0b609cddf Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 15 May 2024 05:35:49 +0200 Subject: Merging upstream version 126.0. Signed-off-by: Daniel Baumann --- third_party/aom/av1/encoder/pickrst.c | 111 ++++++++++++++++++++-------------- 1 file changed, 64 insertions(+), 47 deletions(-) (limited to 'third_party/aom/av1/encoder/pickrst.c') diff --git a/third_party/aom/av1/encoder/pickrst.c b/third_party/aom/av1/encoder/pickrst.c index 6429064175..b0d0d0bb78 100644 --- a/third_party/aom/av1/encoder/pickrst.c +++ b/third_party/aom/av1/encoder/pickrst.c @@ -1103,6 +1103,39 @@ static INLINE int wrap_index(int i, int wiener_win) { return (i >= wiener_halfwin1 ? wiener_win - 1 - i : i); } +// Splits each w[i] into smaller components w1[i] and w2[i] such that +// w[i] = w1[i] * WIENER_TAP_SCALE_FACTOR + w2[i]. +static INLINE void split_wiener_filter_coefficients(int wiener_win, + const int32_t *w, + int32_t *w1, int32_t *w2) { + for (int i = 0; i < wiener_win; i++) { + w1[i] = w[i] / WIENER_TAP_SCALE_FACTOR; + w2[i] = w[i] - w1[i] * WIENER_TAP_SCALE_FACTOR; + assert(w[i] == w1[i] * WIENER_TAP_SCALE_FACTOR + w2[i]); + } +} + +// Calculates x * w / WIENER_TAP_SCALE_FACTOR, where +// w = w1 * WIENER_TAP_SCALE_FACTOR + w2. +// +// The multiplication x * w may overflow, so we multiply x by the components of +// w (w1 and w2) and combine the multiplication with the division. +static INLINE int64_t multiply_and_scale(int64_t x, int32_t w1, int32_t w2) { + // Let y = x * w / WIENER_TAP_SCALE_FACTOR + // = x * (w1 * WIENER_TAP_SCALE_FACTOR + w2) / WIENER_TAP_SCALE_FACTOR + const int64_t y = x * w1 + x * w2 / WIENER_TAP_SCALE_FACTOR; + // Double-check the calculation using __int128. + // TODO(wtc): Remove after 2024-04-30. +#if !defined(NDEBUG) && defined(__GNUC__) && defined(__LP64__) + const int32_t w = w1 * WIENER_TAP_SCALE_FACTOR + w2; + const __int128 z = (__int128)x * w / WIENER_TAP_SCALE_FACTOR; + assert(z >= INT64_MIN); + assert(z <= INT64_MAX); + assert(y == (int64_t)z); +#endif + return y; +} + // Solve linear equations to find Wiener filter tap values // Taps are output scaled by WIENER_FILT_STEP static int linsolve_wiener(int n, int64_t *A, int stride, int64_t *b, @@ -1175,10 +1208,12 @@ static int linsolve_wiener(int n, int64_t *A, int stride, int64_t *b, // Fix vector b, update vector a static AOM_INLINE void update_a_sep_sym(int wiener_win, int64_t **Mc, - int64_t **Hc, int32_t *a, int32_t *b) { + int64_t **Hc, int32_t *a, + const int32_t *b) { int i, j; int64_t S[WIENER_WIN]; int64_t A[WIENER_HALFWIN1], B[WIENER_HALFWIN1 * WIENER_HALFWIN1]; + int32_t b1[WIENER_WIN], b2[WIENER_WIN]; const int wiener_win2 = wiener_win * wiener_win; const int wiener_halfwin1 = (wiener_win >> 1) + 1; memset(A, 0, sizeof(A)); @@ -1189,16 +1224,7 @@ static AOM_INLINE void update_a_sep_sym(int wiener_win, int64_t **Mc, A[jj] += Mc[i][j] * b[i] / WIENER_TAP_SCALE_FACTOR; } } - - // b/274668506: This is the dual branch for the issue in b/272139363. The fix - // is similar. See comments in update_b_sep_sym() below. - int32_t max_b_l = 0; - for (int l = 0; l < wiener_win; ++l) { - const int32_t abs_b_l = abs(b[l]); - if (abs_b_l > max_b_l) max_b_l = abs_b_l; - } - const int scale_threshold = 128 * WIENER_TAP_SCALE_FACTOR; - const int scaler = max_b_l < scale_threshold ? 1 : 4; + split_wiener_filter_coefficients(wiener_win, b, b1, b2); for (i = 0; i < wiener_win; i++) { for (j = 0; j < wiener_win; j++) { @@ -1207,10 +1233,17 @@ static AOM_INLINE void update_a_sep_sym(int wiener_win, int64_t **Mc, const int kk = wrap_index(k, wiener_win); for (l = 0; l < wiener_win; ++l) { const int ll = wrap_index(l, wiener_win); - B[ll * wiener_halfwin1 + kk] += - Hc[j * wiener_win + i][k * wiener_win2 + l] * b[i] / - (scaler * WIENER_TAP_SCALE_FACTOR) * b[j] / - (WIENER_TAP_SCALE_FACTOR / scaler); + // Calculate + // B[ll * wiener_halfwin1 + kk] += + // Hc[j * wiener_win + i][k * wiener_win2 + l] * b[i] / + // WIENER_TAP_SCALE_FACTOR * b[j] / WIENER_TAP_SCALE_FACTOR; + // + // The last multiplication may overflow, so we combine the last + // multiplication with the last division. + const int64_t x = Hc[j * wiener_win + i][k * wiener_win2 + l] * b[i] / + WIENER_TAP_SCALE_FACTOR; + // b[j] = b1[j] * WIENER_TAP_SCALE_FACTOR + b2[j] + B[ll * wiener_halfwin1 + kk] += multiply_and_scale(x, b1[j], b2[j]); } } } @@ -1246,10 +1279,12 @@ static AOM_INLINE void update_a_sep_sym(int wiener_win, int64_t **Mc, // Fix vector a, update vector b static AOM_INLINE void update_b_sep_sym(int wiener_win, int64_t **Mc, - int64_t **Hc, int32_t *a, int32_t *b) { + int64_t **Hc, const int32_t *a, + int32_t *b) { int i, j; int64_t S[WIENER_WIN]; int64_t A[WIENER_HALFWIN1], B[WIENER_HALFWIN1 * WIENER_HALFWIN1]; + int32_t a1[WIENER_WIN], a2[WIENER_WIN]; const int wiener_win2 = wiener_win * wiener_win; const int wiener_halfwin1 = (wiener_win >> 1) + 1; memset(A, 0, sizeof(A)); @@ -1260,32 +1295,7 @@ static AOM_INLINE void update_b_sep_sym(int wiener_win, int64_t **Mc, A[ii] += Mc[i][j] * a[j] / WIENER_TAP_SCALE_FACTOR; } } - - // b/272139363: The computation, - // Hc[i * wiener_win + j][k * wiener_win2 + l] * a[k] / - // WIENER_TAP_SCALE_FACTOR * a[l] / WIENER_TAP_SCALE_FACTOR; - // may generate a signed-integer-overflow. Conditionally scale the terms to - // avoid a potential overflow. - // - // Hc contains accumulated correlation statistics and it is desired to leave - // as much room as possible for Hc. It was experimentally observed that the - // primary issue manifests itself with the second, a[l], multiply. For - // max_a_l < WIENER_TAP_SCALE_FACTOR the first multiply with a[k] should not - // increase dynamic range and the second multiply should hence be safe. - // Thereafter a safe scale_threshold depends on the actual operational range - // of Hc. The largest scale_threshold is expected to depend on bit-depth - // (av1_compute_stats_highbd_c() scales highbd to 8-bit) and maximum - // restoration-unit size (256), leading up to 32-bit positive numbers in Hc. - // Noting that the caller, wiener_decompose_sep_sym(), initializes a[...] - // to a range smaller than 16 bits, the scale_threshold is set as below for - // convenience. - int32_t max_a_l = 0; - for (int l = 0; l < wiener_win; ++l) { - const int32_t abs_a_l = abs(a[l]); - if (abs_a_l > max_a_l) max_a_l = abs_a_l; - } - const int scale_threshold = 128 * WIENER_TAP_SCALE_FACTOR; - const int scaler = max_a_l < scale_threshold ? 1 : 4; + split_wiener_filter_coefficients(wiener_win, a, a1, a2); for (i = 0; i < wiener_win; i++) { const int ii = wrap_index(i, wiener_win); @@ -1294,10 +1304,17 @@ static AOM_INLINE void update_b_sep_sym(int wiener_win, int64_t **Mc, int k, l; for (k = 0; k < wiener_win; ++k) { for (l = 0; l < wiener_win; ++l) { - B[jj * wiener_halfwin1 + ii] += - Hc[i * wiener_win + j][k * wiener_win2 + l] * a[k] / - (scaler * WIENER_TAP_SCALE_FACTOR) * a[l] / - (WIENER_TAP_SCALE_FACTOR / scaler); + // Calculate + // B[jj * wiener_halfwin1 + ii] += + // Hc[i * wiener_win + j][k * wiener_win2 + l] * a[k] / + // WIENER_TAP_SCALE_FACTOR * a[l] / WIENER_TAP_SCALE_FACTOR; + // + // The last multiplication may overflow, so we combine the last + // multiplication with the last division. + const int64_t x = Hc[i * wiener_win + j][k * wiener_win2 + l] * a[k] / + WIENER_TAP_SCALE_FACTOR; + // a[l] = a1[l] * WIENER_TAP_SCALE_FACTOR + a2[l] + B[jj * wiener_halfwin1 + ii] += multiply_and_scale(x, a1[l], a2[l]); } } } @@ -2050,7 +2067,7 @@ void av1_pick_filter_restoration(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi) { &cpi->trial_frame_rst, cm->superres_upscaled_width, cm->superres_upscaled_height, seq_params->subsampling_x, seq_params->subsampling_y, highbd, AOM_RESTORATION_FRAME_BORDER, - cm->features.byte_alignment, NULL, NULL, NULL, 0, 0)) + cm->features.byte_alignment, NULL, NULL, NULL, false, 0)) aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR, "Failed to allocate trial restored frame buffer"); -- cgit v1.2.3