summaryrefslogtreecommitdiffstats
path: root/third_party/jpeg-xl/lib/jxl/convolve.h
blob: 2fcd2d0980dcb97fef3848a6e50d0b44763df7d2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

#ifndef LIB_JXL_CONVOLVE_H_
#define LIB_JXL_CONVOLVE_H_

// 2D convolution.

#include <stddef.h>
#include <stdint.h>

#include "lib/jxl/base/data_parallel.h"
#include "lib/jxl/image.h"

namespace jxl {

// No valid values outside [0, xsize), but the strategy may still safely load
// the preceding vector, and/or round xsize up to the vector lane count. This
// avoids needing PadImage.
// Requires xsize >= kConvolveLanes + kConvolveMaxRadius.
static constexpr size_t kConvolveMaxRadius = 3;

// Weights must already be normalized.

struct WeightsSymmetric3 {
  // d r d (each replicated 4x)
  // r c r
  // d r d
  float c[4];
  float r[4];
  float d[4];
};

struct WeightsSymmetric5 {
  // The lower-right quadrant is: c r R  (each replicated 4x)
  //                              r d L
  //                              R L D
  float c[4];
  float r[4];
  float R[4];
  float d[4];
  float D[4];
  float L[4];
};

// Weights for separable 5x5 filters (typically but not necessarily the same
// values for horizontal and vertical directions). The kernel must already be
// normalized, but note that values for negative offsets are omitted, so the
// given values do not sum to 1.
struct WeightsSeparable5 {
  // Horizontal 1D, distances 0..2 (each replicated 4x)
  float horz[3 * 4];
  float vert[3 * 4];
};

// Weights for separable 7x7 filters (typically but not necessarily the same
// values for horizontal and vertical directions). The kernel must already be
// normalized, but note that values for negative offsets are omitted, so the
// given values do not sum to 1.
//
// NOTE: for >= 7x7 Gaussian kernels, it is faster to use FastGaussian instead,
// at least when images exceed the L1 cache size.
struct WeightsSeparable7 {
  // Horizontal 1D, distances 0..3 (each replicated 4x)
  float horz[4 * 4];
  float vert[4 * 4];
};

const WeightsSymmetric3& WeightsSymmetric3Lowpass();
const WeightsSeparable5& WeightsSeparable5Lowpass();
const WeightsSymmetric5& WeightsSymmetric5Lowpass();

void SlowSymmetric3(const ImageF& in, const Rect& rect,
                    const WeightsSymmetric3& weights, ThreadPool* pool,
                    ImageF* JXL_RESTRICT out);

void SlowSeparable5(const ImageF& in, const Rect& rect,
                    const WeightsSeparable5& weights, ThreadPool* pool,
                    ImageF* out);

void SlowSeparable7(const ImageF& in, const Rect& rect,
                    const WeightsSeparable7& weights, ThreadPool* pool,
                    ImageF* out);

void Symmetric3(const ImageF& in, const Rect& rect,
                const WeightsSymmetric3& weights, ThreadPool* pool,
                ImageF* out);

void Symmetric5(const ImageF& in, const Rect& rect,
                const WeightsSymmetric5& weights, ThreadPool* pool,
                ImageF* JXL_RESTRICT out);

void Separable5(const ImageF& in, const Rect& rect,
                const WeightsSeparable5& weights, ThreadPool* pool,
                ImageF* out);

void Separable7(const ImageF& in, const Rect& rect,
                const WeightsSeparable7& weights, ThreadPool* pool,
                ImageF* out);

}  // namespace jxl

#endif  // LIB_JXL_CONVOLVE_H_