summaryrefslogtreecommitdiffstats
path: root/third_party/jpeg-xl/lib/jxl/gauss_blur_gbench.cc
blob: b1bb64abc582f42764b6a7d5ca0a0b13a531ef31 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

#include <hwy/targets.h>

#include "benchmark/benchmark.h"
#include "lib/jxl/convolve.h"
#include "lib/jxl/gauss_blur.h"
#include "lib/jxl/image_ops.h"

namespace jxl {
namespace {

JXL_MAYBE_UNUSED ImageF Convolve(const ImageF& in,
                                 const std::vector<float>& kernel) {
  return ConvolveAndSample(in, kernel, 1);
}

void BM_GaussBlur1d(benchmark::State& state) {
  // Uncomment to disable SIMD and force and scalar implementation
  // hwy::DisableTargets(~HWY_SCALAR);
  // Uncomment to run AVX2
  // hwy::DisableTargets(HWY_AVX3);

  const size_t length = state.range();
  const double sigma = 7.0;  // (from Butteraugli application)
  ImageF in(length, 1);
  const float expected = length;
  FillImage(expected, &in);

  ImageF temp(length, 1);
  ImageF out(length, 1);
  const auto rg = CreateRecursiveGaussian(sigma);
  for (auto _ : state) {
    FastGaussian1D(rg, in.Row(0), length, out.Row(0));
    // Prevent optimizing out
    JXL_ASSERT(std::abs(out.ConstRow(0)[length / 2] - expected) / expected <
               9E-5);
  }
  state.SetItemsProcessed(length * state.iterations());
}

void BM_GaussBlur2d(benchmark::State& state) {
  // See GaussBlur1d for SIMD changes.

  const size_t xsize = state.range();
  const size_t ysize = xsize;
  const double sigma = 7.0;  // (from Butteraugli application)
  ImageF in(xsize, ysize);
  const float expected = xsize + ysize;
  FillImage(expected, &in);

  ImageF temp(xsize, ysize);
  ImageF out(xsize, ysize);
  ThreadPool* null_pool = nullptr;
  const auto rg = CreateRecursiveGaussian(sigma);
  for (auto _ : state) {
    FastGaussian(rg, in, null_pool, &temp, &out);
    // Prevent optimizing out
    JXL_ASSERT(std::abs(out.ConstRow(ysize / 2)[xsize / 2] - expected) /
                   expected <
               9E-5);
  }
  state.SetItemsProcessed(xsize * ysize * state.iterations());
}

void BM_GaussBlurFir(benchmark::State& state) {
  // See GaussBlur1d for SIMD changes.

  const size_t xsize = state.range();
  const size_t ysize = xsize;
  const double sigma = 7.0;  // (from Butteraugli application)
  ImageF in(xsize, ysize);
  const float expected = xsize + ysize;
  FillImage(expected, &in);

  ImageF temp(xsize, ysize);
  ImageF out(xsize, ysize);
  const std::vector<float> kernel =
      GaussianKernel(static_cast<int>(4 * sigma), static_cast<float>(sigma));
  for (auto _ : state) {
    // Prevent optimizing out
    JXL_ASSERT(std::abs(Convolve(in, kernel).ConstRow(ysize / 2)[xsize / 2] -
                        expected) /
                   expected <
               9E-5);
  }
  state.SetItemsProcessed(xsize * ysize * state.iterations());
}

void BM_GaussBlurSep7(benchmark::State& state) {
  // See GaussBlur1d for SIMD changes.

  const size_t xsize = state.range();
  const size_t ysize = xsize;
  ImageF in(xsize, ysize);
  const float expected = xsize + ysize;
  FillImage(expected, &in);

  ImageF temp(xsize, ysize);
  ImageF out(xsize, ysize);
  ThreadPool* null_pool = nullptr;
  // Gaussian with sigma 1
  const WeightsSeparable7 weights = {{HWY_REP4(0.383103f), HWY_REP4(0.241843f),
                                      HWY_REP4(0.060626f), HWY_REP4(0.00598f)},
                                     {HWY_REP4(0.383103f), HWY_REP4(0.241843f),
                                      HWY_REP4(0.060626f), HWY_REP4(0.00598f)}};
  for (auto _ : state) {
    Separable7(in, Rect(in), weights, null_pool, &out);
    // Prevent optimizing out
    JXL_ASSERT(std::abs(out.ConstRow(ysize / 2)[xsize / 2] - expected) /
                   expected <
               9E-5);
  }
  state.SetItemsProcessed(xsize * ysize * state.iterations());
}

BENCHMARK(BM_GaussBlur1d)->Range(1 << 8, 1 << 14);
BENCHMARK(BM_GaussBlur2d)->Range(1 << 7, 1 << 10);
BENCHMARK(BM_GaussBlurFir)->Range(1 << 7, 1 << 10);
BENCHMARK(BM_GaussBlurSep7)->Range(1 << 7, 1 << 10);

}  // namespace
}  // namespace jxl