// Copyright (c) the JPEG XL Project Authors. All rights reserved. // // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. #include "lib/jxl/convolve.h" #include #include #include // PRIx64 #undef HWY_TARGET_INCLUDE #define HWY_TARGET_INCLUDE "lib/jxl/convolve_test.cc" #include #include #include #include #include #include "lib/jxl/base/compiler_specific.h" #include "lib/jxl/base/data_parallel.h" #include "lib/jxl/base/printf_macros.h" #include "lib/jxl/base/random.h" #include "lib/jxl/base/rect.h" #include "lib/jxl/image_ops.h" #include "lib/jxl/image_test_utils.h" #include "lib/jxl/test_utils.h" #include "lib/jxl/testing.h" #ifndef JXL_DEBUG_CONVOLVE #define JXL_DEBUG_CONVOLVE 0 #endif #include "lib/jxl/convolve-inl.h" HWY_BEFORE_NAMESPACE(); namespace jxl { namespace HWY_NAMESPACE { void TestNeighbors() { const Neighbors::D d; const Neighbors::V v = Iota(d, 0); constexpr size_t kMaxVectorSize = 64; constexpr size_t M = kMaxVectorSize / sizeof(float); HWY_ALIGN float actual[M] = {0}; HWY_ALIGN float first_l1[M] = {0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}; Store(Neighbors::FirstL1(v), d, actual); const size_t N = Lanes(d); ASSERT_LE(N, M); EXPECT_EQ(std::vector(first_l1, first_l1 + N), std::vector(actual, actual + N)); #if HWY_TARGET != HWY_SCALAR HWY_ALIGN float first_l2[M] = {1, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13}; Store(Neighbors::FirstL2(v), d, actual); EXPECT_EQ(std::vector(first_l2, first_l2 + N), std::vector(actual, actual + N)); HWY_ALIGN float first_l3[] = {2, 1, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; Store(Neighbors::FirstL3(v), d, actual); EXPECT_EQ(std::vector(first_l3, first_l3 + N), std::vector(actual, actual + N)); #endif // HWY_TARGET != HWY_SCALAR } void VerifySymmetric3(const size_t xsize, const size_t ysize, ThreadPool* pool, Rng* rng) { const Rect rect(0, 0, xsize, ysize); JXL_ASSIGN_OR_DIE(ImageF in, ImageF::Create(xsize, ysize)); GenerateImage(*rng, &in, 0.0f, 1.0f); JXL_ASSIGN_OR_DIE(ImageF out_expected, ImageF::Create(xsize, ysize)); JXL_ASSIGN_OR_DIE(ImageF out_actual, ImageF::Create(xsize, ysize)); const WeightsSymmetric3& weights = WeightsSymmetric3Lowpass(); Symmetric3(in, rect, weights, pool, &out_expected); SlowSymmetric3(in, rect, weights, pool, &out_actual); JXL_ASSERT_OK(VerifyRelativeError(out_expected, out_actual, 1E-5f, 1E-5f, _)); } std::vector GenerateTestRectangles(size_t xsize, size_t ysize) { std::vector out; for (size_t tl : {0, 1, 13}) { for (size_t br : {0, 1, 13}) { if (xsize > tl + br && ysize > tl + br) { out.emplace_back(tl, tl, xsize - tl - br, ysize - tl - br); } } } return out; } // Ensures Symmetric and Separable give the same result. void VerifySymmetric5(const size_t xsize, const size_t ysize, ThreadPool* pool, Rng* rng) { JXL_ASSIGN_OR_DIE(ImageF in, ImageF::Create(xsize, ysize)); GenerateImage(*rng, &in, 0.0f, 1.0f); for (const Rect& in_rect : GenerateTestRectangles(xsize, ysize)) { JXL_DEBUG(JXL_DEBUG_CONVOLVE, "in_rect: %" PRIuS "x%" PRIuS "+%" PRIuS ",%" PRIuS "", in_rect.xsize(), in_rect.ysize(), in_rect.x0(), in_rect.y0()); { Rect out_rect = in_rect; JXL_ASSIGN_OR_DIE(ImageF out_expected, ImageF::Create(xsize, ysize)); JXL_ASSIGN_OR_DIE(ImageF out_actual, ImageF::Create(xsize, ysize)); FillImage(-1.0f, &out_expected); FillImage(-1.0f, &out_actual); SlowSeparable5(in, in_rect, WeightsSeparable5Lowpass(), pool, &out_expected, out_rect); Symmetric5(in, in_rect, WeightsSymmetric5Lowpass(), pool, &out_actual, out_rect); JXL_ASSERT_OK( VerifyRelativeError(out_expected, out_actual, 1E-5f, 1E-5f, _)); } { Rect out_rect(0, 0, in_rect.xsize(), in_rect.ysize()); JXL_ASSIGN_OR_DIE(ImageF out_expected, ImageF::Create(out_rect.xsize(), out_rect.ysize())); JXL_ASSIGN_OR_DIE(ImageF out_actual, ImageF::Create(out_rect.xsize(), out_rect.ysize())); SlowSeparable5(in, in_rect, WeightsSeparable5Lowpass(), pool, &out_expected, out_rect); Symmetric5(in, in_rect, WeightsSymmetric5Lowpass(), pool, &out_actual, out_rect); JXL_ASSERT_OK( VerifyRelativeError(out_expected, out_actual, 1E-5f, 1E-5f, _)); } } } void VerifySeparable5(const size_t xsize, const size_t ysize, ThreadPool* pool, Rng* rng) { const Rect rect(0, 0, xsize, ysize); JXL_ASSIGN_OR_DIE(ImageF in, ImageF::Create(xsize, ysize)); GenerateImage(*rng, &in, 0.0f, 1.0f); JXL_ASSIGN_OR_DIE(ImageF out_expected, ImageF::Create(xsize, ysize)); JXL_ASSIGN_OR_DIE(ImageF out_actual, ImageF::Create(xsize, ysize)); const WeightsSeparable5& weights = WeightsSeparable5Lowpass(); SlowSeparable5(in, rect, weights, pool, &out_expected, rect); Separable5(in, rect, weights, pool, &out_actual); JXL_ASSERT_OK(VerifyRelativeError(out_expected, out_actual, 1E-5f, 1E-5f, _)); } // For all xsize/ysize and kernels: void TestConvolve() { TestNeighbors(); test::ThreadPoolForTests pool(4); EXPECT_EQ(true, RunOnPool( pool.get(), kConvolveMaxRadius, 40, ThreadPool::NoInit, [](const uint32_t task, size_t /*thread*/) { const size_t xsize = task; Rng rng(129 + 13 * xsize); ThreadPool* null_pool = nullptr; test::ThreadPoolForTests pool3(3); for (size_t ysize = kConvolveMaxRadius; ysize < 16; ++ysize) { JXL_DEBUG(JXL_DEBUG_CONVOLVE, "%" PRIuS " x %" PRIuS " (target %" PRIx64 ")===============================", xsize, ysize, static_cast(HWY_TARGET)); JXL_DEBUG(JXL_DEBUG_CONVOLVE, "Sym3------------------"); VerifySymmetric3(xsize, ysize, null_pool, &rng); VerifySymmetric3(xsize, ysize, pool3.get(), &rng); JXL_DEBUG(JXL_DEBUG_CONVOLVE, "Sym5------------------"); VerifySymmetric5(xsize, ysize, null_pool, &rng); VerifySymmetric5(xsize, ysize, pool3.get(), &rng); JXL_DEBUG(JXL_DEBUG_CONVOLVE, "Sep5------------------"); VerifySeparable5(xsize, ysize, null_pool, &rng); VerifySeparable5(xsize, ysize, pool3.get(), &rng); } }, "TestConvolve")); } // Measures durations, verifies results, prints timings. `unpredictable1` // must have value 1 (unknown to the compiler to prevent elision). template void BenchmarkConv(const char* caption, const Conv& conv, const hwy::FuncInput unpredictable1) { const size_t kNumInputs = 1; const hwy::FuncInput inputs[kNumInputs] = {unpredictable1}; hwy::Result results[kNumInputs]; const size_t kDim = 160; // in+out fit in L2 JXL_ASSIGN_OR_DIE(ImageF in, ImageF::Create(kDim, kDim)); ZeroFillImage(&in); in.Row(kDim / 2)[kDim / 2] = unpredictable1; JXL_ASSIGN_OR_DIE(ImageF out, ImageF::Create(kDim, kDim)); hwy::Params p; p.verbose = false; p.max_evals = 7; p.target_rel_mad = 0.002; const size_t num_results = MeasureClosure( [&in, &conv, &out](const hwy::FuncInput input) { conv(in, &out); return out.Row(input)[0]; }, inputs, kNumInputs, results, p); if (num_results != kNumInputs) { fprintf(stderr, "MeasureClosure failed.\n"); } for (size_t i = 0; i < num_results; ++i) { const double seconds = static_cast(results[i].ticks) / hwy::platform::InvariantTicksPerSecond(); printf("%12s: %7.2f MP/s (MAD=%4.2f%%)\n", caption, kDim * kDim * 1E-6 / seconds, static_cast(results[i].variability) * 100.0); } } struct ConvSymmetric3 { void operator()(const ImageF& in, ImageF* JXL_RESTRICT out) const { ThreadPool* null_pool = nullptr; Symmetric3(in, Rect(in), WeightsSymmetric3Lowpass(), null_pool, out); } }; struct ConvSeparable5 { void operator()(const ImageF& in, ImageF* JXL_RESTRICT out) const { ThreadPool* null_pool = nullptr; Separable5(in, Rect(in), WeightsSeparable5Lowpass(), null_pool, out); } }; void BenchmarkAll() { #if JXL_FALSE // disabled to avoid test timeouts, run manually on demand const hwy::FuncInput unpredictable1 = time(nullptr) != 1234; BenchmarkConv("Symmetric3", ConvSymmetric3(), unpredictable1); BenchmarkConv("Separable5", ConvSeparable5(), unpredictable1); #endif } // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace jxl HWY_AFTER_NAMESPACE(); #if HWY_ONCE namespace jxl { class ConvolveTest : public hwy::TestWithParamTarget {}; HWY_TARGET_INSTANTIATE_TEST_SUITE_P(ConvolveTest); HWY_EXPORT_AND_TEST_P(ConvolveTest, TestConvolve); HWY_EXPORT_AND_TEST_P(ConvolveTest, BenchmarkAll); } // namespace jxl #endif