From 36d22d82aa202bb199967e9512281e9a53db42c9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 21:33:14 +0200 Subject: Adding upstream version 115.7.0esr. Signed-off-by: Daniel Baumann --- .../jpeg-xl/lib/jxl/render_pipeline/stage_xyb.cc | 176 +++++++++++++++++++++ 1 file changed, 176 insertions(+) create mode 100644 third_party/jpeg-xl/lib/jxl/render_pipeline/stage_xyb.cc (limited to 'third_party/jpeg-xl/lib/jxl/render_pipeline/stage_xyb.cc') diff --git a/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_xyb.cc b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_xyb.cc new file mode 100644 index 0000000000..15cfc75b18 --- /dev/null +++ b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_xyb.cc @@ -0,0 +1,176 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/render_pipeline/stage_xyb.h" + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/render_pipeline/stage_xyb.cc" +#include +#include + +#include "lib/jxl/dec_xyb-inl.h" +#include "lib/jxl/opsin_params.h" +#include "lib/jxl/sanitizers.h" + +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { + +class XYBStage : public RenderPipelineStage { + public: + explicit XYBStage(const OutputEncodingInfo& output_encoding_info) + : RenderPipelineStage(RenderPipelineStage::Settings()), + opsin_params_(output_encoding_info.opsin_params), + output_is_xyb_(output_encoding_info.color_encoding.GetColorSpace() == + ColorSpace::kXYB) {} + + void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows, + size_t xextra, size_t xsize, size_t xpos, size_t ypos, + size_t thread_id) const final { + PROFILER_ZONE("UndoXYB"); + + const HWY_FULL(float) d; + JXL_ASSERT(xextra == 0); + const size_t xsize_v = RoundUpTo(xsize, Lanes(d)); + float* JXL_RESTRICT row0 = GetInputRow(input_rows, 0, 0); + float* JXL_RESTRICT row1 = GetInputRow(input_rows, 1, 0); + float* JXL_RESTRICT row2 = GetInputRow(input_rows, 2, 0); + // All calculations are lane-wise, still some might require + // value-dependent behaviour (e.g. NearestInt). Temporary unpoison last + // vector tail. + msan::UnpoisonMemory(row0 + xsize, sizeof(float) * (xsize_v - xsize)); + msan::UnpoisonMemory(row1 + xsize, sizeof(float) * (xsize_v - xsize)); + msan::UnpoisonMemory(row2 + xsize, sizeof(float) * (xsize_v - xsize)); + // TODO(eustas): when using frame origin, addresses might be unaligned; + // making them aligned will void performance penalty. + if (output_is_xyb_) { + const auto scale_x = Set(d, kScaledXYBScale[0]); + const auto scale_y = Set(d, kScaledXYBScale[1]); + const auto scale_bmy = Set(d, kScaledXYBScale[2]); + const auto offset_x = Set(d, kScaledXYBOffset[0]); + const auto offset_y = Set(d, kScaledXYBOffset[1]); + const auto offset_bmy = Set(d, kScaledXYBOffset[2]); + for (ssize_t x = -xextra; x < (ssize_t)(xsize + xextra); x += Lanes(d)) { + const auto in_x = LoadU(d, row0 + x); + const auto in_y = LoadU(d, row1 + x); + const auto in_b = LoadU(d, row2 + x); + auto out_x = Mul(Add(in_x, offset_x), scale_x); + auto out_y = Mul(Add(in_y, offset_y), scale_y); + auto out_b = Mul(Add(Sub(in_b, in_y), offset_bmy), scale_bmy); + StoreU(out_x, d, row0 + x); + StoreU(out_y, d, row1 + x); + StoreU(out_b, d, row2 + x); + } + } else { + for (ssize_t x = -xextra; x < (ssize_t)(xsize + xextra); x += Lanes(d)) { + const auto in_opsin_x = LoadU(d, row0 + x); + const auto in_opsin_y = LoadU(d, row1 + x); + const auto in_opsin_b = LoadU(d, row2 + x); + auto r = Undefined(d); + auto g = Undefined(d); + auto b = Undefined(d); + XybToRgb(d, in_opsin_x, in_opsin_y, in_opsin_b, opsin_params_, &r, &g, + &b); + StoreU(r, d, row0 + x); + StoreU(g, d, row1 + x); + StoreU(b, d, row2 + x); + } + } + msan::PoisonMemory(row0 + xsize, sizeof(float) * (xsize_v - xsize)); + msan::PoisonMemory(row1 + xsize, sizeof(float) * (xsize_v - xsize)); + msan::PoisonMemory(row2 + xsize, sizeof(float) * (xsize_v - xsize)); + } + + RenderPipelineChannelMode GetChannelMode(size_t c) const final { + return c < 3 ? RenderPipelineChannelMode::kInPlace + : RenderPipelineChannelMode::kIgnored; + } + + const char* GetName() const override { return "XYB"; } + + private: + const OpsinParams opsin_params_; + const bool output_is_xyb_; +}; + +std::unique_ptr GetXYBStage( + const OutputEncodingInfo& output_encoding_info) { + return jxl::make_unique(output_encoding_info); +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { + +HWY_EXPORT(GetXYBStage); + +std::unique_ptr GetXYBStage( + const OutputEncodingInfo& output_encoding_info) { + return HWY_DYNAMIC_DISPATCH(GetXYBStage)(output_encoding_info); +} + +namespace { +class FastXYBStage : public RenderPipelineStage { + public: + FastXYBStage(uint8_t* rgb, size_t stride, size_t width, size_t height, + bool rgba, bool has_alpha, size_t alpha_c) + : RenderPipelineStage(RenderPipelineStage::Settings()), + rgb_(rgb), + stride_(stride), + width_(width), + height_(height), + rgba_(rgba), + has_alpha_(has_alpha), + alpha_c_(alpha_c) {} + + void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows, + size_t xextra, size_t xsize, size_t xpos, size_t ypos, + size_t thread_id) const final { + if (ypos >= height_) return; + JXL_ASSERT(xextra == 0); + const float* xyba[4] = { + GetInputRow(input_rows, 0, 0), GetInputRow(input_rows, 1, 0), + GetInputRow(input_rows, 2, 0), + has_alpha_ ? GetInputRow(input_rows, alpha_c_, 0) : nullptr}; + uint8_t* out_buf = rgb_ + stride_ * ypos + (rgba_ ? 4 : 3) * xpos; + FastXYBTosRGB8(xyba, out_buf, rgba_, + xsize + xpos <= width_ ? xsize : width_ - xpos); + } + + RenderPipelineChannelMode GetChannelMode(size_t c) const final { + return c < 3 || (has_alpha_ && c == alpha_c_) + ? RenderPipelineChannelMode::kInput + : RenderPipelineChannelMode::kIgnored; + } + + const char* GetName() const override { return "FastXYB"; } + + private: + uint8_t* rgb_; + size_t stride_; + size_t width_; + size_t height_; + bool rgba_; + bool has_alpha_; + size_t alpha_c_; + std::vector opaque_alpha_; +}; + +} // namespace + +std::unique_ptr GetFastXYBTosRGB8Stage( + uint8_t* rgb, size_t stride, size_t width, size_t height, bool rgba, + bool has_alpha, size_t alpha_c) { + JXL_ASSERT(HasFastXYBTosRGB8()); + return make_unique(rgb, stride, width, height, rgba, has_alpha, + alpha_c); +} + +} // namespace jxl +#endif -- cgit v1.2.3