summaryrefslogtreecommitdiffstats
path: root/third_party/aom/av1/encoder/pickrst.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--third_party/aom/av1/encoder/pickrst.c111
1 files changed, 64 insertions, 47 deletions
diff --git a/third_party/aom/av1/encoder/pickrst.c b/third_party/aom/av1/encoder/pickrst.c
index 6429064175..b0d0d0bb78 100644
--- a/third_party/aom/av1/encoder/pickrst.c
+++ b/third_party/aom/av1/encoder/pickrst.c
@@ -1103,6 +1103,39 @@ static INLINE int wrap_index(int i, int wiener_win) {
return (i >= wiener_halfwin1 ? wiener_win - 1 - i : i);
}
+// Splits each w[i] into smaller components w1[i] and w2[i] such that
+// w[i] = w1[i] * WIENER_TAP_SCALE_FACTOR + w2[i].
+static INLINE void split_wiener_filter_coefficients(int wiener_win,
+ const int32_t *w,
+ int32_t *w1, int32_t *w2) {
+ for (int i = 0; i < wiener_win; i++) {
+ w1[i] = w[i] / WIENER_TAP_SCALE_FACTOR;
+ w2[i] = w[i] - w1[i] * WIENER_TAP_SCALE_FACTOR;
+ assert(w[i] == w1[i] * WIENER_TAP_SCALE_FACTOR + w2[i]);
+ }
+}
+
+// Calculates x * w / WIENER_TAP_SCALE_FACTOR, where
+// w = w1 * WIENER_TAP_SCALE_FACTOR + w2.
+//
+// The multiplication x * w may overflow, so we multiply x by the components of
+// w (w1 and w2) and combine the multiplication with the division.
+static INLINE int64_t multiply_and_scale(int64_t x, int32_t w1, int32_t w2) {
+ // Let y = x * w / WIENER_TAP_SCALE_FACTOR
+ // = x * (w1 * WIENER_TAP_SCALE_FACTOR + w2) / WIENER_TAP_SCALE_FACTOR
+ const int64_t y = x * w1 + x * w2 / WIENER_TAP_SCALE_FACTOR;
+ // Double-check the calculation using __int128.
+ // TODO(wtc): Remove after 2024-04-30.
+#if !defined(NDEBUG) && defined(__GNUC__) && defined(__LP64__)
+ const int32_t w = w1 * WIENER_TAP_SCALE_FACTOR + w2;
+ const __int128 z = (__int128)x * w / WIENER_TAP_SCALE_FACTOR;
+ assert(z >= INT64_MIN);
+ assert(z <= INT64_MAX);
+ assert(y == (int64_t)z);
+#endif
+ return y;
+}
+
// Solve linear equations to find Wiener filter tap values
// Taps are output scaled by WIENER_FILT_STEP
static int linsolve_wiener(int n, int64_t *A, int stride, int64_t *b,
@@ -1175,10 +1208,12 @@ static int linsolve_wiener(int n, int64_t *A, int stride, int64_t *b,
// Fix vector b, update vector a
static AOM_INLINE void update_a_sep_sym(int wiener_win, int64_t **Mc,
- int64_t **Hc, int32_t *a, int32_t *b) {
+ int64_t **Hc, int32_t *a,
+ const int32_t *b) {
int i, j;
int64_t S[WIENER_WIN];
int64_t A[WIENER_HALFWIN1], B[WIENER_HALFWIN1 * WIENER_HALFWIN1];
+ int32_t b1[WIENER_WIN], b2[WIENER_WIN];
const int wiener_win2 = wiener_win * wiener_win;
const int wiener_halfwin1 = (wiener_win >> 1) + 1;
memset(A, 0, sizeof(A));
@@ -1189,16 +1224,7 @@ static AOM_INLINE void update_a_sep_sym(int wiener_win, int64_t **Mc,
A[jj] += Mc[i][j] * b[i] / WIENER_TAP_SCALE_FACTOR;
}
}
-
- // b/274668506: This is the dual branch for the issue in b/272139363. The fix
- // is similar. See comments in update_b_sep_sym() below.
- int32_t max_b_l = 0;
- for (int l = 0; l < wiener_win; ++l) {
- const int32_t abs_b_l = abs(b[l]);
- if (abs_b_l > max_b_l) max_b_l = abs_b_l;
- }
- const int scale_threshold = 128 * WIENER_TAP_SCALE_FACTOR;
- const int scaler = max_b_l < scale_threshold ? 1 : 4;
+ split_wiener_filter_coefficients(wiener_win, b, b1, b2);
for (i = 0; i < wiener_win; i++) {
for (j = 0; j < wiener_win; j++) {
@@ -1207,10 +1233,17 @@ static AOM_INLINE void update_a_sep_sym(int wiener_win, int64_t **Mc,
const int kk = wrap_index(k, wiener_win);
for (l = 0; l < wiener_win; ++l) {
const int ll = wrap_index(l, wiener_win);
- B[ll * wiener_halfwin1 + kk] +=
- Hc[j * wiener_win + i][k * wiener_win2 + l] * b[i] /
- (scaler * WIENER_TAP_SCALE_FACTOR) * b[j] /
- (WIENER_TAP_SCALE_FACTOR / scaler);
+ // Calculate
+ // B[ll * wiener_halfwin1 + kk] +=
+ // Hc[j * wiener_win + i][k * wiener_win2 + l] * b[i] /
+ // WIENER_TAP_SCALE_FACTOR * b[j] / WIENER_TAP_SCALE_FACTOR;
+ //
+ // The last multiplication may overflow, so we combine the last
+ // multiplication with the last division.
+ const int64_t x = Hc[j * wiener_win + i][k * wiener_win2 + l] * b[i] /
+ WIENER_TAP_SCALE_FACTOR;
+ // b[j] = b1[j] * WIENER_TAP_SCALE_FACTOR + b2[j]
+ B[ll * wiener_halfwin1 + kk] += multiply_and_scale(x, b1[j], b2[j]);
}
}
}
@@ -1246,10 +1279,12 @@ static AOM_INLINE void update_a_sep_sym(int wiener_win, int64_t **Mc,
// Fix vector a, update vector b
static AOM_INLINE void update_b_sep_sym(int wiener_win, int64_t **Mc,
- int64_t **Hc, int32_t *a, int32_t *b) {
+ int64_t **Hc, const int32_t *a,
+ int32_t *b) {
int i, j;
int64_t S[WIENER_WIN];
int64_t A[WIENER_HALFWIN1], B[WIENER_HALFWIN1 * WIENER_HALFWIN1];
+ int32_t a1[WIENER_WIN], a2[WIENER_WIN];
const int wiener_win2 = wiener_win * wiener_win;
const int wiener_halfwin1 = (wiener_win >> 1) + 1;
memset(A, 0, sizeof(A));
@@ -1260,32 +1295,7 @@ static AOM_INLINE void update_b_sep_sym(int wiener_win, int64_t **Mc,
A[ii] += Mc[i][j] * a[j] / WIENER_TAP_SCALE_FACTOR;
}
}
-
- // b/272139363: The computation,
- // Hc[i * wiener_win + j][k * wiener_win2 + l] * a[k] /
- // WIENER_TAP_SCALE_FACTOR * a[l] / WIENER_TAP_SCALE_FACTOR;
- // may generate a signed-integer-overflow. Conditionally scale the terms to
- // avoid a potential overflow.
- //
- // Hc contains accumulated correlation statistics and it is desired to leave
- // as much room as possible for Hc. It was experimentally observed that the
- // primary issue manifests itself with the second, a[l], multiply. For
- // max_a_l < WIENER_TAP_SCALE_FACTOR the first multiply with a[k] should not
- // increase dynamic range and the second multiply should hence be safe.
- // Thereafter a safe scale_threshold depends on the actual operational range
- // of Hc. The largest scale_threshold is expected to depend on bit-depth
- // (av1_compute_stats_highbd_c() scales highbd to 8-bit) and maximum
- // restoration-unit size (256), leading up to 32-bit positive numbers in Hc.
- // Noting that the caller, wiener_decompose_sep_sym(), initializes a[...]
- // to a range smaller than 16 bits, the scale_threshold is set as below for
- // convenience.
- int32_t max_a_l = 0;
- for (int l = 0; l < wiener_win; ++l) {
- const int32_t abs_a_l = abs(a[l]);
- if (abs_a_l > max_a_l) max_a_l = abs_a_l;
- }
- const int scale_threshold = 128 * WIENER_TAP_SCALE_FACTOR;
- const int scaler = max_a_l < scale_threshold ? 1 : 4;
+ split_wiener_filter_coefficients(wiener_win, a, a1, a2);
for (i = 0; i < wiener_win; i++) {
const int ii = wrap_index(i, wiener_win);
@@ -1294,10 +1304,17 @@ static AOM_INLINE void update_b_sep_sym(int wiener_win, int64_t **Mc,
int k, l;
for (k = 0; k < wiener_win; ++k) {
for (l = 0; l < wiener_win; ++l) {
- B[jj * wiener_halfwin1 + ii] +=
- Hc[i * wiener_win + j][k * wiener_win2 + l] * a[k] /
- (scaler * WIENER_TAP_SCALE_FACTOR) * a[l] /
- (WIENER_TAP_SCALE_FACTOR / scaler);
+ // Calculate
+ // B[jj * wiener_halfwin1 + ii] +=
+ // Hc[i * wiener_win + j][k * wiener_win2 + l] * a[k] /
+ // WIENER_TAP_SCALE_FACTOR * a[l] / WIENER_TAP_SCALE_FACTOR;
+ //
+ // The last multiplication may overflow, so we combine the last
+ // multiplication with the last division.
+ const int64_t x = Hc[i * wiener_win + j][k * wiener_win2 + l] * a[k] /
+ WIENER_TAP_SCALE_FACTOR;
+ // a[l] = a1[l] * WIENER_TAP_SCALE_FACTOR + a2[l]
+ B[jj * wiener_halfwin1 + ii] += multiply_and_scale(x, a1[l], a2[l]);
}
}
}
@@ -2050,7 +2067,7 @@ void av1_pick_filter_restoration(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi) {
&cpi->trial_frame_rst, cm->superres_upscaled_width,
cm->superres_upscaled_height, seq_params->subsampling_x,
seq_params->subsampling_y, highbd, AOM_RESTORATION_FRAME_BORDER,
- cm->features.byte_alignment, NULL, NULL, NULL, 0, 0))
+ cm->features.byte_alignment, NULL, NULL, NULL, false, 0))
aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
"Failed to allocate trial restored frame buffer");