diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
commit | 26a029d407be480d791972afb5975cf62c9360a6 (patch) | |
tree | f435a8308119effd964b339f76abb83a57c29483 /third_party/jpeg-xl/lib/jxl/convolve_symmetric5.cc | |
parent | Initial commit. (diff) | |
download | firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip |
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/jpeg-xl/lib/jxl/convolve_symmetric5.cc')
-rw-r--r-- | third_party/jpeg-xl/lib/jxl/convolve_symmetric5.cc | 189 |
1 files changed, 189 insertions, 0 deletions
diff --git a/third_party/jpeg-xl/lib/jxl/convolve_symmetric5.cc b/third_party/jpeg-xl/lib/jxl/convolve_symmetric5.cc new file mode 100644 index 0000000000..2e203fd08f --- /dev/null +++ b/third_party/jpeg-xl/lib/jxl/convolve_symmetric5.cc @@ -0,0 +1,189 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/convolve.h" + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/convolve_symmetric5.cc" +#include <hwy/foreach_target.h> +#include <hwy/highway.h> + +#include "lib/jxl/base/common.h" +#include "lib/jxl/convolve-inl.h" + +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Add; +using hwy::HWY_NAMESPACE::Mul; +using hwy::HWY_NAMESPACE::Vec; + +// Weighted sum of 1x5 pixels around ix, iy with [wx2 wx1 wx0 wx1 wx2]. +template <class WrapY> +static float WeightedSumBorder(const ImageF& in, const WrapY wrap_y, + const int64_t ix, const int64_t iy, + const size_t xsize, const size_t ysize, + const float wx0, const float wx1, + const float wx2) { + const WrapMirror wrap_x; + const float* JXL_RESTRICT row = in.ConstRow(wrap_y(iy, ysize)); + const float in_m2 = row[wrap_x(ix - 2, xsize)]; + const float in_p2 = row[wrap_x(ix + 2, xsize)]; + const float in_m1 = row[wrap_x(ix - 1, xsize)]; + const float in_p1 = row[wrap_x(ix + 1, xsize)]; + const float in_00 = row[ix]; + const float sum_2 = wx2 * (in_m2 + in_p2); + const float sum_1 = wx1 * (in_m1 + in_p1); + const float sum_0 = wx0 * in_00; + return sum_2 + (sum_1 + sum_0); +} + +template <class WrapY, class V> +static V WeightedSum(const ImageF& in, const WrapY wrap_y, const size_t ix, + const int64_t iy, const size_t ysize, const V wx0, + const V wx1, const V wx2) { + const HWY_FULL(float) d; + const float* JXL_RESTRICT center = in.ConstRow(wrap_y(iy, ysize)) + ix; + const auto in_m2 = LoadU(d, center - 2); + const auto in_p2 = LoadU(d, center + 2); + const auto in_m1 = LoadU(d, center - 1); + const auto in_p1 = LoadU(d, center + 1); + const auto in_00 = LoadU(d, center); + const auto sum_2 = Mul(wx2, Add(in_m2, in_p2)); + const auto sum_1 = Mul(wx1, Add(in_m1, in_p1)); + const auto sum_0 = Mul(wx0, in_00); + return Add(sum_2, Add(sum_1, sum_0)); +} + +// Produces result for one pixel +template <class WrapY> +float Symmetric5Border(const ImageF& in, const int64_t ix, const int64_t iy, + const WeightsSymmetric5& weights) { + const float w0 = weights.c[0]; + const float w1 = weights.r[0]; + const float w2 = weights.R[0]; + const float w4 = weights.d[0]; + const float w5 = weights.L[0]; + const float w8 = weights.D[0]; + + const size_t xsize = in.xsize(); + const size_t ysize = in.ysize(); + const WrapY wrap_y; + // Unrolled loop over all 5 rows of the kernel. + float sum0 = WeightedSumBorder(in, wrap_y, ix, iy, xsize, ysize, w0, w1, w2); + + sum0 += WeightedSumBorder(in, wrap_y, ix, iy - 2, xsize, ysize, w2, w5, w8); + float sum1 = + WeightedSumBorder(in, wrap_y, ix, iy + 2, xsize, ysize, w2, w5, w8); + + sum0 += WeightedSumBorder(in, wrap_y, ix, iy - 1, xsize, ysize, w1, w4, w5); + sum1 += WeightedSumBorder(in, wrap_y, ix, iy + 1, xsize, ysize, w1, w4, w5); + + return sum0 + sum1; +} + +// Produces result for one vector's worth of pixels +template <class WrapY> +static void Symmetric5Interior(const ImageF& in, const int64_t ix, + const int64_t rix, const int64_t iy, + const WeightsSymmetric5& weights, + float* JXL_RESTRICT row_out) { + const HWY_FULL(float) d; + + const auto w0 = LoadDup128(d, weights.c); + const auto w1 = LoadDup128(d, weights.r); + const auto w2 = LoadDup128(d, weights.R); + const auto w4 = LoadDup128(d, weights.d); + const auto w5 = LoadDup128(d, weights.L); + const auto w8 = LoadDup128(d, weights.D); + + const size_t ysize = in.ysize(); + const WrapY wrap_y; + // Unrolled loop over all 5 rows of the kernel. + auto sum0 = WeightedSum(in, wrap_y, ix, iy, ysize, w0, w1, w2); + + sum0 = Add(sum0, WeightedSum(in, wrap_y, ix, iy - 2, ysize, w2, w5, w8)); + auto sum1 = WeightedSum(in, wrap_y, ix, iy + 2, ysize, w2, w5, w8); + + sum0 = Add(sum0, WeightedSum(in, wrap_y, ix, iy - 1, ysize, w1, w4, w5)); + sum1 = Add(sum1, WeightedSum(in, wrap_y, ix, iy + 1, ysize, w1, w4, w5)); + + StoreU(Add(sum0, sum1), d, row_out + rix); +} + +template <class WrapY> +static void Symmetric5Row(const ImageF& in, const Rect& rect, const int64_t iy, + const WeightsSymmetric5& weights, + float* JXL_RESTRICT row_out) { + const int64_t kRadius = 2; + const size_t xend = rect.x1(); + + size_t rix = 0; + size_t ix = rect.x0(); + const HWY_FULL(float) d; + const size_t N = Lanes(d); + const size_t aligned_x = RoundUpTo(kRadius, N); + for (; ix < std::min(aligned_x, xend); ++ix, ++rix) { + row_out[rix] = Symmetric5Border<WrapY>(in, ix, iy, weights); + } + for (; ix + N + kRadius <= xend; ix += N, rix += N) { + Symmetric5Interior<WrapY>(in, ix, rix, iy, weights, row_out); + } + for (; ix < xend; ++ix, ++rix) { + row_out[rix] = Symmetric5Border<WrapY>(in, ix, iy, weights); + } +} + +// Semi-vectorized (interior pixels Fonly); called directly like slow::, unlike +// the fully vectorized strategies below. +void Symmetric5(const ImageF& in, const Rect& in_rect, + const WeightsSymmetric5& weights, ThreadPool* pool, + ImageF* JXL_RESTRICT out, const Rect& out_rect) { + JXL_ASSERT(in_rect.xsize() == out_rect.xsize()); + JXL_ASSERT(in_rect.ysize() == out_rect.ysize()); + const size_t ysize = in_rect.ysize(); + JXL_CHECK(RunOnPool( + pool, 0, static_cast<uint32_t>(ysize), ThreadPool::NoInit, + [&](const uint32_t task, size_t /*thread*/) { + const int64_t riy = task; + const int64_t iy = in_rect.y0() + riy; + + if (iy < 2 || iy >= static_cast<ssize_t>(in.ysize()) - 2) { + Symmetric5Row<WrapMirror>(in, in_rect, iy, weights, + out_rect.Row(out, riy)); + } else { + Symmetric5Row<WrapUnchanged>(in, in_rect, iy, weights, + out_rect.Row(out, riy)); + } + }, + "Symmetric5x5Convolution")); +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { + +HWY_EXPORT(Symmetric5); +void Symmetric5(const ImageF& in, const Rect& in_rect, + const WeightsSymmetric5& weights, ThreadPool* pool, + ImageF* JXL_RESTRICT out, const Rect& out_rect) { + return HWY_DYNAMIC_DISPATCH(Symmetric5)(in, in_rect, weights, pool, out, + out_rect); +} + +void Symmetric5(const ImageF& in, const Rect& rect, + const WeightsSymmetric5& weights, ThreadPool* pool, + ImageF* JXL_RESTRICT out) { + return Symmetric5(in, rect, weights, pool, out, Rect(*out)); +} + +} // namespace jxl +#endif // HWY_ONCE |