summaryrefslogtreecommitdiffstats
path: root/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_chroma_upsampling.cc
blob: 936fbd3a4427e163f84ac95d59e29f2772303b13 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

#include "lib/jxl/render_pipeline/stage_chroma_upsampling.h"

#undef HWY_TARGET_INCLUDE
#define HWY_TARGET_INCLUDE "lib/jxl/render_pipeline/stage_chroma_upsampling.cc"
#include <hwy/foreach_target.h>
#include <hwy/highway.h>

#include "lib/jxl/simd_util-inl.h"

HWY_BEFORE_NAMESPACE();
namespace jxl {
namespace HWY_NAMESPACE {

// These templates are not found via ADL.
using hwy::HWY_NAMESPACE::Mul;
using hwy::HWY_NAMESPACE::MulAdd;

class HorizontalChromaUpsamplingStage : public RenderPipelineStage {
 public:
  explicit HorizontalChromaUpsamplingStage(size_t channel)
      : RenderPipelineStage(RenderPipelineStage::Settings::ShiftX(
            /*shift=*/1, /*border=*/1)),
        c_(channel) {}

  void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
                  size_t xextra, size_t xsize, size_t xpos, size_t ypos,
                  size_t thread_id) const final {
    HWY_FULL(float) df;
    xextra = RoundUpTo(xextra, Lanes(df));
    auto threefour = Set(df, 0.75f);
    auto onefour = Set(df, 0.25f);
    const float* row_in = GetInputRow(input_rows, c_, 0);
    float* row_out = GetOutputRow(output_rows, c_, 0);
    for (ssize_t x = -xextra; x < static_cast<ssize_t>(xsize + xextra);
         x += Lanes(df)) {
      auto current = Mul(LoadU(df, row_in + x), threefour);
      auto prev = LoadU(df, row_in + x - 1);
      auto next = LoadU(df, row_in + x + 1);
      auto left = MulAdd(onefour, prev, current);
      auto right = MulAdd(onefour, next, current);
      StoreInterleaved(df, left, right, row_out + x * 2);
    }
  }

  RenderPipelineChannelMode GetChannelMode(size_t c) const final {
    return c == c_ ? RenderPipelineChannelMode::kInOut
                   : RenderPipelineChannelMode::kIgnored;
  }

  const char* GetName() const override { return "HChromaUps"; }

 private:
  size_t c_;
};

class VerticalChromaUpsamplingStage : public RenderPipelineStage {
 public:
  explicit VerticalChromaUpsamplingStage(size_t channel)
      : RenderPipelineStage(RenderPipelineStage::Settings::ShiftY(
            /*shift=*/1, /*border=*/1)),
        c_(channel) {}

  void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
                  size_t xextra, size_t xsize, size_t xpos, size_t ypos,
                  size_t thread_id) const final {
    HWY_FULL(float) df;
    xextra = RoundUpTo(xextra, Lanes(df));
    auto threefour = Set(df, 0.75f);
    auto onefour = Set(df, 0.25f);
    const float* row_top = GetInputRow(input_rows, c_, -1);
    const float* row_mid = GetInputRow(input_rows, c_, 0);
    const float* row_bot = GetInputRow(input_rows, c_, 1);
    float* row_out0 = GetOutputRow(output_rows, c_, 0);
    float* row_out1 = GetOutputRow(output_rows, c_, 1);
    for (ssize_t x = -xextra; x < static_cast<ssize_t>(xsize + xextra);
         x += Lanes(df)) {
      auto it = LoadU(df, row_top + x);
      auto im = LoadU(df, row_mid + x);
      auto ib = LoadU(df, row_bot + x);
      auto im_scaled = Mul(im, threefour);
      Store(MulAdd(it, onefour, im_scaled), df, row_out0 + x);
      Store(MulAdd(ib, onefour, im_scaled), df, row_out1 + x);
    }
  }

  RenderPipelineChannelMode GetChannelMode(size_t c) const final {
    return c == c_ ? RenderPipelineChannelMode::kInOut
                   : RenderPipelineChannelMode::kIgnored;
  }

  const char* GetName() const override { return "VChromaUps"; }

 private:
  size_t c_;
};

std::unique_ptr<RenderPipelineStage> GetChromaUpsamplingStage(size_t channel,
                                                              bool horizontal) {
  if (horizontal) {
    return jxl::make_unique<HorizontalChromaUpsamplingStage>(channel);
  } else {
    return jxl::make_unique<VerticalChromaUpsamplingStage>(channel);
  }
}

// NOLINTNEXTLINE(google-readability-namespace-comments)
}  // namespace HWY_NAMESPACE
}  // namespace jxl
HWY_AFTER_NAMESPACE();

#if HWY_ONCE
namespace jxl {

HWY_EXPORT(GetChromaUpsamplingStage);

std::unique_ptr<RenderPipelineStage> GetChromaUpsamplingStage(size_t channel,
                                                              bool horizontal) {
  return HWY_DYNAMIC_DISPATCH(GetChromaUpsamplingStage)(channel, horizontal);
}

}  // namespace jxl
#endif