summaryrefslogtreecommitdiffstats
path: root/media/libwebp/sharpyuv/sharpyuv.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--media/libwebp/sharpyuv/sharpyuv.c498
1 files changed, 498 insertions, 0 deletions
diff --git a/media/libwebp/sharpyuv/sharpyuv.c b/media/libwebp/sharpyuv/sharpyuv.c
new file mode 100644
index 0000000000..8b3ab7216b
--- /dev/null
+++ b/media/libwebp/sharpyuv/sharpyuv.c
@@ -0,0 +1,498 @@
+// Copyright 2022 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Sharp RGB to YUV conversion.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "sharpyuv/sharpyuv.h"
+
+#include <assert.h>
+#include <limits.h>
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "src/webp/types.h"
+#include "src/dsp/cpu.h"
+#include "sharpyuv/sharpyuv_dsp.h"
+#include "sharpyuv/sharpyuv_gamma.h"
+
+//------------------------------------------------------------------------------
+// Sharp RGB->YUV conversion
+
+static const int kNumIterations = 4;
+
+#define YUV_FIX 16 // fixed-point precision for RGB->YUV
+static const int kYuvHalf = 1 << (YUV_FIX - 1);
+
+// Max bit depth so that intermediate calculations fit in 16 bits.
+static const int kMaxBitDepth = 14;
+
+// Returns the precision shift to use based on the input rgb_bit_depth.
+static int GetPrecisionShift(int rgb_bit_depth) {
+ // Try to add 2 bits of precision if it fits in kMaxBitDepth. Otherwise remove
+ // bits if needed.
+ return ((rgb_bit_depth + 2) <= kMaxBitDepth) ? 2
+ : (kMaxBitDepth - rgb_bit_depth);
+}
+
+typedef int16_t fixed_t; // signed type with extra precision for UV
+typedef uint16_t fixed_y_t; // unsigned type with extra precision for W
+
+//------------------------------------------------------------------------------
+
+static uint8_t clip_8b(fixed_t v) {
+ return (!(v & ~0xff)) ? (uint8_t)v : (v < 0) ? 0u : 255u;
+}
+
+static uint16_t clip(fixed_t v, int max) {
+ return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v;
+}
+
+static fixed_y_t clip_bit_depth(int y, int bit_depth) {
+ const int max = (1 << bit_depth) - 1;
+ return (!(y & ~max)) ? (fixed_y_t)y : (y < 0) ? 0 : max;
+}
+
+//------------------------------------------------------------------------------
+
+static int RGBToGray(int64_t r, int64_t g, int64_t b) {
+ const int64_t luma = 13933 * r + 46871 * g + 4732 * b + kYuvHalf;
+ return (int)(luma >> YUV_FIX);
+}
+
+static uint32_t ScaleDown(uint16_t a, uint16_t b, uint16_t c, uint16_t d,
+ int rgb_bit_depth) {
+ const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth);
+ const uint32_t A = SharpYuvGammaToLinear(a, bit_depth);
+ const uint32_t B = SharpYuvGammaToLinear(b, bit_depth);
+ const uint32_t C = SharpYuvGammaToLinear(c, bit_depth);
+ const uint32_t D = SharpYuvGammaToLinear(d, bit_depth);
+ return SharpYuvLinearToGamma((A + B + C + D + 2) >> 2, bit_depth);
+}
+
+static WEBP_INLINE void UpdateW(const fixed_y_t* src, fixed_y_t* dst, int w,
+ int rgb_bit_depth) {
+ const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth);
+ int i;
+ for (i = 0; i < w; ++i) {
+ const uint32_t R = SharpYuvGammaToLinear(src[0 * w + i], bit_depth);
+ const uint32_t G = SharpYuvGammaToLinear(src[1 * w + i], bit_depth);
+ const uint32_t B = SharpYuvGammaToLinear(src[2 * w + i], bit_depth);
+ const uint32_t Y = RGBToGray(R, G, B);
+ dst[i] = (fixed_y_t)SharpYuvLinearToGamma(Y, bit_depth);
+ }
+}
+
+static void UpdateChroma(const fixed_y_t* src1, const fixed_y_t* src2,
+ fixed_t* dst, int uv_w, int rgb_bit_depth) {
+ int i;
+ for (i = 0; i < uv_w; ++i) {
+ const int r =
+ ScaleDown(src1[0 * uv_w + 0], src1[0 * uv_w + 1], src2[0 * uv_w + 0],
+ src2[0 * uv_w + 1], rgb_bit_depth);
+ const int g =
+ ScaleDown(src1[2 * uv_w + 0], src1[2 * uv_w + 1], src2[2 * uv_w + 0],
+ src2[2 * uv_w + 1], rgb_bit_depth);
+ const int b =
+ ScaleDown(src1[4 * uv_w + 0], src1[4 * uv_w + 1], src2[4 * uv_w + 0],
+ src2[4 * uv_w + 1], rgb_bit_depth);
+ const int W = RGBToGray(r, g, b);
+ dst[0 * uv_w] = (fixed_t)(r - W);
+ dst[1 * uv_w] = (fixed_t)(g - W);
+ dst[2 * uv_w] = (fixed_t)(b - W);
+ dst += 1;
+ src1 += 2;
+ src2 += 2;
+ }
+}
+
+static void StoreGray(const fixed_y_t* rgb, fixed_y_t* y, int w) {
+ int i;
+ assert(w > 0);
+ for (i = 0; i < w; ++i) {
+ y[i] = RGBToGray(rgb[0 * w + i], rgb[1 * w + i], rgb[2 * w + i]);
+ }
+}
+
+//------------------------------------------------------------------------------
+
+static WEBP_INLINE fixed_y_t Filter2(int A, int B, int W0, int bit_depth) {
+ const int v0 = (A * 3 + B + 2) >> 2;
+ return clip_bit_depth(v0 + W0, bit_depth);
+}
+
+//------------------------------------------------------------------------------
+
+static WEBP_INLINE int Shift(int v, int shift) {
+ return (shift >= 0) ? (v << shift) : (v >> -shift);
+}
+
+static void ImportOneRow(const uint8_t* const r_ptr,
+ const uint8_t* const g_ptr,
+ const uint8_t* const b_ptr,
+ int rgb_step,
+ int rgb_bit_depth,
+ int pic_width,
+ fixed_y_t* const dst) {
+ // Convert the rgb_step from a number of bytes to a number of uint8_t or
+ // uint16_t values depending the bit depth.
+ const int step = (rgb_bit_depth > 8) ? rgb_step / 2 : rgb_step;
+ int i;
+ const int w = (pic_width + 1) & ~1;
+ for (i = 0; i < pic_width; ++i) {
+ const int off = i * step;
+ const int shift = GetPrecisionShift(rgb_bit_depth);
+ if (rgb_bit_depth == 8) {
+ dst[i + 0 * w] = Shift(r_ptr[off], shift);
+ dst[i + 1 * w] = Shift(g_ptr[off], shift);
+ dst[i + 2 * w] = Shift(b_ptr[off], shift);
+ } else {
+ dst[i + 0 * w] = Shift(((uint16_t*)r_ptr)[off], shift);
+ dst[i + 1 * w] = Shift(((uint16_t*)g_ptr)[off], shift);
+ dst[i + 2 * w] = Shift(((uint16_t*)b_ptr)[off], shift);
+ }
+ }
+ if (pic_width & 1) { // replicate rightmost pixel
+ dst[pic_width + 0 * w] = dst[pic_width + 0 * w - 1];
+ dst[pic_width + 1 * w] = dst[pic_width + 1 * w - 1];
+ dst[pic_width + 2 * w] = dst[pic_width + 2 * w - 1];
+ }
+}
+
+static void InterpolateTwoRows(const fixed_y_t* const best_y,
+ const fixed_t* prev_uv,
+ const fixed_t* cur_uv,
+ const fixed_t* next_uv,
+ int w,
+ fixed_y_t* out1,
+ fixed_y_t* out2,
+ int rgb_bit_depth) {
+ const int uv_w = w >> 1;
+ const int len = (w - 1) >> 1; // length to filter
+ int k = 3;
+ const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth);
+ while (k-- > 0) { // process each R/G/B segments in turn
+ // special boundary case for i==0
+ out1[0] = Filter2(cur_uv[0], prev_uv[0], best_y[0], bit_depth);
+ out2[0] = Filter2(cur_uv[0], next_uv[0], best_y[w], bit_depth);
+
+ SharpYuvFilterRow(cur_uv, prev_uv, len, best_y + 0 + 1, out1 + 1,
+ bit_depth);
+ SharpYuvFilterRow(cur_uv, next_uv, len, best_y + w + 1, out2 + 1,
+ bit_depth);
+
+ // special boundary case for i == w - 1 when w is even
+ if (!(w & 1)) {
+ out1[w - 1] = Filter2(cur_uv[uv_w - 1], prev_uv[uv_w - 1],
+ best_y[w - 1 + 0], bit_depth);
+ out2[w - 1] = Filter2(cur_uv[uv_w - 1], next_uv[uv_w - 1],
+ best_y[w - 1 + w], bit_depth);
+ }
+ out1 += w;
+ out2 += w;
+ prev_uv += uv_w;
+ cur_uv += uv_w;
+ next_uv += uv_w;
+ }
+}
+
+static WEBP_INLINE int RGBToYUVComponent(int r, int g, int b,
+ const int coeffs[4], int sfix) {
+ const int srounder = 1 << (YUV_FIX + sfix - 1);
+ const int luma = coeffs[0] * r + coeffs[1] * g + coeffs[2] * b +
+ coeffs[3] + srounder;
+ return (luma >> (YUV_FIX + sfix));
+}
+
+static int ConvertWRGBToYUV(const fixed_y_t* best_y, const fixed_t* best_uv,
+ uint8_t* y_ptr, int y_stride, uint8_t* u_ptr,
+ int u_stride, uint8_t* v_ptr, int v_stride,
+ int rgb_bit_depth,
+ int yuv_bit_depth, int width, int height,
+ const SharpYuvConversionMatrix* yuv_matrix) {
+ int i, j;
+ const fixed_t* const best_uv_base = best_uv;
+ const int w = (width + 1) & ~1;
+ const int h = (height + 1) & ~1;
+ const int uv_w = w >> 1;
+ const int uv_h = h >> 1;
+ const int sfix = GetPrecisionShift(rgb_bit_depth);
+ const int yuv_max = (1 << yuv_bit_depth) - 1;
+
+ for (best_uv = best_uv_base, j = 0; j < height; ++j) {
+ for (i = 0; i < width; ++i) {
+ const int off = (i >> 1);
+ const int W = best_y[i];
+ const int r = best_uv[off + 0 * uv_w] + W;
+ const int g = best_uv[off + 1 * uv_w] + W;
+ const int b = best_uv[off + 2 * uv_w] + W;
+ const int y = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_y, sfix);
+ if (yuv_bit_depth <= 8) {
+ y_ptr[i] = clip_8b(y);
+ } else {
+ ((uint16_t*)y_ptr)[i] = clip(y, yuv_max);
+ }
+ }
+ best_y += w;
+ best_uv += (j & 1) * 3 * uv_w;
+ y_ptr += y_stride;
+ }
+ for (best_uv = best_uv_base, j = 0; j < uv_h; ++j) {
+ for (i = 0; i < uv_w; ++i) {
+ const int off = i;
+ // Note r, g and b values here are off by W, but a constant offset on all
+ // 3 components doesn't change the value of u and v with a YCbCr matrix.
+ const int r = best_uv[off + 0 * uv_w];
+ const int g = best_uv[off + 1 * uv_w];
+ const int b = best_uv[off + 2 * uv_w];
+ const int u = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_u, sfix);
+ const int v = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_v, sfix);
+ if (yuv_bit_depth <= 8) {
+ u_ptr[i] = clip_8b(u);
+ v_ptr[i] = clip_8b(v);
+ } else {
+ ((uint16_t*)u_ptr)[i] = clip(u, yuv_max);
+ ((uint16_t*)v_ptr)[i] = clip(v, yuv_max);
+ }
+ }
+ best_uv += 3 * uv_w;
+ u_ptr += u_stride;
+ v_ptr += v_stride;
+ }
+ return 1;
+}
+
+//------------------------------------------------------------------------------
+// Main function
+
+static void* SafeMalloc(uint64_t nmemb, size_t size) {
+ const uint64_t total_size = nmemb * (uint64_t)size;
+ if (total_size != (size_t)total_size) return NULL;
+ return malloc((size_t)total_size);
+}
+
+#define SAFE_ALLOC(W, H, T) ((T*)SafeMalloc((W) * (H), sizeof(T)))
+
+static int DoSharpArgbToYuv(const uint8_t* r_ptr, const uint8_t* g_ptr,
+ const uint8_t* b_ptr, int rgb_step, int rgb_stride,
+ int rgb_bit_depth, uint8_t* y_ptr, int y_stride,
+ uint8_t* u_ptr, int u_stride, uint8_t* v_ptr,
+ int v_stride, int yuv_bit_depth, int width,
+ int height,
+ const SharpYuvConversionMatrix* yuv_matrix) {
+ // we expand the right/bottom border if needed
+ const int w = (width + 1) & ~1;
+ const int h = (height + 1) & ~1;
+ const int uv_w = w >> 1;
+ const int uv_h = h >> 1;
+ uint64_t prev_diff_y_sum = ~0;
+ int j, iter;
+
+ // TODO(skal): allocate one big memory chunk. But for now, it's easier
+ // for valgrind debugging to have several chunks.
+ fixed_y_t* const tmp_buffer = SAFE_ALLOC(w * 3, 2, fixed_y_t); // scratch
+ fixed_y_t* const best_y_base = SAFE_ALLOC(w, h, fixed_y_t);
+ fixed_y_t* const target_y_base = SAFE_ALLOC(w, h, fixed_y_t);
+ fixed_y_t* const best_rgb_y = SAFE_ALLOC(w, 2, fixed_y_t);
+ fixed_t* const best_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t);
+ fixed_t* const target_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t);
+ fixed_t* const best_rgb_uv = SAFE_ALLOC(uv_w * 3, 1, fixed_t);
+ fixed_y_t* best_y = best_y_base;
+ fixed_y_t* target_y = target_y_base;
+ fixed_t* best_uv = best_uv_base;
+ fixed_t* target_uv = target_uv_base;
+ const uint64_t diff_y_threshold = (uint64_t)(3.0 * w * h);
+ int ok;
+ assert(w > 0);
+ assert(h > 0);
+
+ if (best_y_base == NULL || best_uv_base == NULL ||
+ target_y_base == NULL || target_uv_base == NULL ||
+ best_rgb_y == NULL || best_rgb_uv == NULL ||
+ tmp_buffer == NULL) {
+ ok = 0;
+ goto End;
+ }
+
+ // Import RGB samples to W/RGB representation.
+ for (j = 0; j < height; j += 2) {
+ const int is_last_row = (j == height - 1);
+ fixed_y_t* const src1 = tmp_buffer + 0 * w;
+ fixed_y_t* const src2 = tmp_buffer + 3 * w;
+
+ // prepare two rows of input
+ ImportOneRow(r_ptr, g_ptr, b_ptr, rgb_step, rgb_bit_depth, width,
+ src1);
+ if (!is_last_row) {
+ ImportOneRow(r_ptr + rgb_stride, g_ptr + rgb_stride, b_ptr + rgb_stride,
+ rgb_step, rgb_bit_depth, width, src2);
+ } else {
+ memcpy(src2, src1, 3 * w * sizeof(*src2));
+ }
+ StoreGray(src1, best_y + 0, w);
+ StoreGray(src2, best_y + w, w);
+
+ UpdateW(src1, target_y, w, rgb_bit_depth);
+ UpdateW(src2, target_y + w, w, rgb_bit_depth);
+ UpdateChroma(src1, src2, target_uv, uv_w, rgb_bit_depth);
+ memcpy(best_uv, target_uv, 3 * uv_w * sizeof(*best_uv));
+ best_y += 2 * w;
+ best_uv += 3 * uv_w;
+ target_y += 2 * w;
+ target_uv += 3 * uv_w;
+ r_ptr += 2 * rgb_stride;
+ g_ptr += 2 * rgb_stride;
+ b_ptr += 2 * rgb_stride;
+ }
+
+ // Iterate and resolve clipping conflicts.
+ for (iter = 0; iter < kNumIterations; ++iter) {
+ const fixed_t* cur_uv = best_uv_base;
+ const fixed_t* prev_uv = best_uv_base;
+ uint64_t diff_y_sum = 0;
+
+ best_y = best_y_base;
+ best_uv = best_uv_base;
+ target_y = target_y_base;
+ target_uv = target_uv_base;
+ for (j = 0; j < h; j += 2) {
+ fixed_y_t* const src1 = tmp_buffer + 0 * w;
+ fixed_y_t* const src2 = tmp_buffer + 3 * w;
+ {
+ const fixed_t* const next_uv = cur_uv + ((j < h - 2) ? 3 * uv_w : 0);
+ InterpolateTwoRows(best_y, prev_uv, cur_uv, next_uv, w,
+ src1, src2, rgb_bit_depth);
+ prev_uv = cur_uv;
+ cur_uv = next_uv;
+ }
+
+ UpdateW(src1, best_rgb_y + 0 * w, w, rgb_bit_depth);
+ UpdateW(src2, best_rgb_y + 1 * w, w, rgb_bit_depth);
+ UpdateChroma(src1, src2, best_rgb_uv, uv_w, rgb_bit_depth);
+
+ // update two rows of Y and one row of RGB
+ diff_y_sum +=
+ SharpYuvUpdateY(target_y, best_rgb_y, best_y, 2 * w,
+ rgb_bit_depth + GetPrecisionShift(rgb_bit_depth));
+ SharpYuvUpdateRGB(target_uv, best_rgb_uv, best_uv, 3 * uv_w);
+
+ best_y += 2 * w;
+ best_uv += 3 * uv_w;
+ target_y += 2 * w;
+ target_uv += 3 * uv_w;
+ }
+ // test exit condition
+ if (iter > 0) {
+ if (diff_y_sum < diff_y_threshold) break;
+ if (diff_y_sum > prev_diff_y_sum) break;
+ }
+ prev_diff_y_sum = diff_y_sum;
+ }
+
+ // final reconstruction
+ ok = ConvertWRGBToYUV(best_y_base, best_uv_base, y_ptr, y_stride, u_ptr,
+ u_stride, v_ptr, v_stride, rgb_bit_depth, yuv_bit_depth,
+ width, height, yuv_matrix);
+
+ End:
+ free(best_y_base);
+ free(best_uv_base);
+ free(target_y_base);
+ free(target_uv_base);
+ free(best_rgb_y);
+ free(best_rgb_uv);
+ free(tmp_buffer);
+ return ok;
+}
+#undef SAFE_ALLOC
+
+// Hidden exported init function.
+// By default SharpYuvConvert calls it with NULL. If needed, users can declare
+// it as extern and call it with a VP8CPUInfo function.
+extern void SharpYuvInit(VP8CPUInfo cpu_info_func);
+void SharpYuvInit(VP8CPUInfo cpu_info_func) {
+ static volatile VP8CPUInfo sharpyuv_last_cpuinfo_used =
+ (VP8CPUInfo)&sharpyuv_last_cpuinfo_used;
+ const int initialized =
+ (sharpyuv_last_cpuinfo_used != (VP8CPUInfo)&sharpyuv_last_cpuinfo_used);
+ if (cpu_info_func == NULL && initialized) return;
+ if (sharpyuv_last_cpuinfo_used == cpu_info_func) return;
+
+ SharpYuvInitDsp(cpu_info_func);
+ if (!initialized) {
+ SharpYuvInitGammaTables();
+ }
+
+ sharpyuv_last_cpuinfo_used = cpu_info_func;
+}
+
+int SharpYuvConvert(const void* r_ptr, const void* g_ptr,
+ const void* b_ptr, int rgb_step, int rgb_stride,
+ int rgb_bit_depth, void* y_ptr, int y_stride,
+ void* u_ptr, int u_stride, void* v_ptr,
+ int v_stride, int yuv_bit_depth, int width,
+ int height, const SharpYuvConversionMatrix* yuv_matrix) {
+ SharpYuvConversionMatrix scaled_matrix;
+ const int rgb_max = (1 << rgb_bit_depth) - 1;
+ const int rgb_round = 1 << (rgb_bit_depth - 1);
+ const int yuv_max = (1 << yuv_bit_depth) - 1;
+ const int sfix = GetPrecisionShift(rgb_bit_depth);
+
+ if (width < 1 || height < 1 || width == INT_MAX || height == INT_MAX ||
+ r_ptr == NULL || g_ptr == NULL || b_ptr == NULL || y_ptr == NULL ||
+ u_ptr == NULL || v_ptr == NULL) {
+ return 0;
+ }
+ if (rgb_bit_depth != 8 && rgb_bit_depth != 10 && rgb_bit_depth != 12 &&
+ rgb_bit_depth != 16) {
+ return 0;
+ }
+ if (yuv_bit_depth != 8 && yuv_bit_depth != 10 && yuv_bit_depth != 12) {
+ return 0;
+ }
+ if (rgb_bit_depth > 8 && (rgb_step % 2 != 0 || rgb_stride %2 != 0)) {
+ // Step/stride should be even for uint16_t buffers.
+ return 0;
+ }
+ if (yuv_bit_depth > 8 &&
+ (y_stride % 2 != 0 || u_stride % 2 != 0 || v_stride % 2 != 0)) {
+ // Stride should be even for uint16_t buffers.
+ return 0;
+ }
+ SharpYuvInit(NULL);
+
+ // Add scaling factor to go from rgb_bit_depth to yuv_bit_depth, to the
+ // rgb->yuv conversion matrix.
+ if (rgb_bit_depth == yuv_bit_depth) {
+ memcpy(&scaled_matrix, yuv_matrix, sizeof(scaled_matrix));
+ } else {
+ int i;
+ for (i = 0; i < 3; ++i) {
+ scaled_matrix.rgb_to_y[i] =
+ (yuv_matrix->rgb_to_y[i] * yuv_max + rgb_round) / rgb_max;
+ scaled_matrix.rgb_to_u[i] =
+ (yuv_matrix->rgb_to_u[i] * yuv_max + rgb_round) / rgb_max;
+ scaled_matrix.rgb_to_v[i] =
+ (yuv_matrix->rgb_to_v[i] * yuv_max + rgb_round) / rgb_max;
+ }
+ }
+ // Also incorporate precision change scaling.
+ scaled_matrix.rgb_to_y[3] = Shift(yuv_matrix->rgb_to_y[3], sfix);
+ scaled_matrix.rgb_to_u[3] = Shift(yuv_matrix->rgb_to_u[3], sfix);
+ scaled_matrix.rgb_to_v[3] = Shift(yuv_matrix->rgb_to_v[3], sfix);
+
+ return DoSharpArgbToYuv(r_ptr, g_ptr, b_ptr, rgb_step, rgb_stride,
+ rgb_bit_depth, y_ptr, y_stride, u_ptr, u_stride,
+ v_ptr, v_stride, yuv_bit_depth, width, height,
+ &scaled_matrix);
+}
+
+//------------------------------------------------------------------------------