diff options
Diffstat (limited to 'third_party/aom/test/selfguided_filter_test.cc')
-rw-r--r-- | third_party/aom/test/selfguided_filter_test.cc | 435 |
1 files changed, 435 insertions, 0 deletions
diff --git a/third_party/aom/test/selfguided_filter_test.cc b/third_party/aom/test/selfguided_filter_test.cc new file mode 100644 index 0000000000..3dd513b6e0 --- /dev/null +++ b/third_party/aom/test/selfguided_filter_test.cc @@ -0,0 +1,435 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <ctime> +#include <tuple> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "config/av1_rtcd.h" + +#include "test/acm_random.h" +#include "test/register_state_check.h" +#include "test/util.h" + +#include "aom_ports/aom_timer.h" +#include "av1/common/mv.h" +#include "av1/common/restoration.h" + +namespace { + +using libaom_test::ACMRandom; +using std::make_tuple; +using std::tuple; + +typedef int (*SgrFunc)(const uint8_t *dat8, int width, int height, int stride, + int eps, const int *xqd, uint8_t *dst8, int dst_stride, + int32_t *tmpbuf, int bit_depth, int highbd); + +// Test parameter list: +// <tst_fun_> +typedef tuple<SgrFunc> FilterTestParam; + +class AV1SelfguidedFilterTest + : public ::testing::TestWithParam<FilterTestParam> { + public: + ~AV1SelfguidedFilterTest() override = default; + void SetUp() override {} + + protected: + void RunSpeedTest() { + tst_fun_ = GET_PARAM(0); + const int pu_width = RESTORATION_PROC_UNIT_SIZE; + const int pu_height = RESTORATION_PROC_UNIT_SIZE; + const int width = 256, height = 256, stride = 288, out_stride = 288; + const int NUM_ITERS = 2000; + int i, j, k; + + uint8_t *input_ = + (uint8_t *)aom_memalign(32, stride * (height + 32) * sizeof(uint8_t)); + ASSERT_NE(input_, nullptr); + uint8_t *output_ = (uint8_t *)aom_memalign( + 32, out_stride * (height + 32) * sizeof(uint8_t)); + ASSERT_NE(output_, nullptr); + int32_t *tmpbuf = (int32_t *)aom_memalign(32, RESTORATION_TMPBUF_SIZE); + ASSERT_NE(tmpbuf, nullptr); + uint8_t *input = input_ + stride * 16 + 16; + uint8_t *output = output_ + out_stride * 16 + 16; + + ACMRandom rnd(ACMRandom::DeterministicSeed()); + + for (i = -16; i < height + 16; ++i) + for (j = -16; j < width + 16; ++j) + input[i * stride + j] = rnd.Rand16() & 0xFF; + + int xqd[2] = { SGRPROJ_PRJ_MIN0 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 - + SGRPROJ_PRJ_MIN0), + SGRPROJ_PRJ_MIN1 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 - + SGRPROJ_PRJ_MIN1) }; + // Fix a parameter set, since the speed depends slightly on r. + // Change this to test different combinations of values of r. + int eps = 15; + + av1_loop_restoration_precal(); + + aom_usec_timer ref_timer; + aom_usec_timer_start(&ref_timer); + for (i = 0; i < NUM_ITERS; ++i) { + for (k = 0; k < height; k += pu_height) + for (j = 0; j < width; j += pu_width) { + int w = AOMMIN(pu_width, width - j); + int h = AOMMIN(pu_height, height - k); + uint8_t *input_p = input + k * stride + j; + uint8_t *output_p = output + k * out_stride + j; + const int ret_c = av1_apply_selfguided_restoration_c( + input_p, w, h, stride, eps, xqd, output_p, out_stride, tmpbuf, 8, + 0); + ASSERT_EQ(ret_c, 0); + } + } + aom_usec_timer_mark(&ref_timer); + const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer); + + aom_usec_timer tst_timer; + aom_usec_timer_start(&tst_timer); + for (i = 0; i < NUM_ITERS; ++i) { + for (k = 0; k < height; k += pu_height) + for (j = 0; j < width; j += pu_width) { + int w = AOMMIN(pu_width, width - j); + int h = AOMMIN(pu_height, height - k); + uint8_t *input_p = input + k * stride + j; + uint8_t *output_p = output + k * out_stride + j; + const int ret_tst = tst_fun_(input_p, w, h, stride, eps, xqd, + output_p, out_stride, tmpbuf, 8, 0); + ASSERT_EQ(ret_tst, 0); + } + } + aom_usec_timer_mark(&tst_timer); + const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer); + + std::cout << "[ ] C time = " << ref_time / 1000 + << " ms, SIMD time = " << tst_time / 1000 << " ms\n"; + + EXPECT_GT(ref_time, tst_time) + << "Error: AV1SelfguidedFilterTest.SpeedTest, SIMD slower than C.\n" + << "C time: " << ref_time << " us\n" + << "SIMD time: " << tst_time << " us\n"; + + aom_free(input_); + aom_free(output_); + aom_free(tmpbuf); + } + + void RunCorrectnessTest() { + tst_fun_ = GET_PARAM(0); + const int pu_width = RESTORATION_PROC_UNIT_SIZE; + const int pu_height = RESTORATION_PROC_UNIT_SIZE; + // Set the maximum width/height to test here. We actually test a small + // range of sizes *up to* this size, so that we can check, eg., + // the behaviour on tiles which are not a multiple of 4 wide. + const int max_w = 260, max_h = 260, stride = 672, out_stride = 672; + const int NUM_ITERS = 81; + int i, j, k; + + uint8_t *input_ = + (uint8_t *)aom_memalign(32, stride * (max_h + 32) * sizeof(uint8_t)); + ASSERT_NE(input_, nullptr); + uint8_t *output_ = (uint8_t *)aom_memalign( + 32, out_stride * (max_h + 32) * sizeof(uint8_t)); + ASSERT_NE(output_, nullptr); + uint8_t *output2_ = (uint8_t *)aom_memalign( + 32, out_stride * (max_h + 32) * sizeof(uint8_t)); + ASSERT_NE(output2_, nullptr); + int32_t *tmpbuf = (int32_t *)aom_memalign(32, RESTORATION_TMPBUF_SIZE); + ASSERT_NE(tmpbuf, nullptr); + + uint8_t *input = input_ + stride * 16 + 16; + uint8_t *output = output_ + out_stride * 16 + 16; + uint8_t *output2 = output2_ + out_stride * 16 + 16; + + ACMRandom rnd(ACMRandom::DeterministicSeed()); + + av1_loop_restoration_precal(); + + for (i = 0; i < NUM_ITERS; ++i) { + for (j = -16; j < max_h + 16; ++j) + for (k = -16; k < max_w + 16; ++k) + input[j * stride + k] = rnd.Rand16() & 0xFF; + + int xqd[2] = { SGRPROJ_PRJ_MIN0 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 - + SGRPROJ_PRJ_MIN0), + SGRPROJ_PRJ_MIN1 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 - + SGRPROJ_PRJ_MIN1) }; + int eps = rnd.PseudoUniform(1 << SGRPROJ_PARAMS_BITS); + + // Test various tile sizes around 256x256 + int test_w = max_w - (i / 9); + int test_h = max_h - (i % 9); + + for (k = 0; k < test_h; k += pu_height) + for (j = 0; j < test_w; j += pu_width) { + int w = AOMMIN(pu_width, test_w - j); + int h = AOMMIN(pu_height, test_h - k); + uint8_t *input_p = input + k * stride + j; + uint8_t *output_p = output + k * out_stride + j; + uint8_t *output2_p = output2 + k * out_stride + j; + const int ret_tst = tst_fun_(input_p, w, h, stride, eps, xqd, + output_p, out_stride, tmpbuf, 8, 0); + ASSERT_EQ(ret_tst, 0); + const int ret_c = av1_apply_selfguided_restoration_c( + input_p, w, h, stride, eps, xqd, output2_p, out_stride, tmpbuf, 8, + 0); + ASSERT_EQ(ret_c, 0); + } + + for (j = 0; j < test_h; ++j) + for (k = 0; k < test_w; ++k) { + ASSERT_EQ(output[j * out_stride + k], output2[j * out_stride + k]); + } + } + + aom_free(input_); + aom_free(output_); + aom_free(output2_); + aom_free(tmpbuf); + } + + private: + SgrFunc tst_fun_; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1SelfguidedFilterTest); + +TEST_P(AV1SelfguidedFilterTest, DISABLED_SpeedTest) { RunSpeedTest(); } +TEST_P(AV1SelfguidedFilterTest, CorrectnessTest) { RunCorrectnessTest(); } + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_SUITE_P( + SSE4_1, AV1SelfguidedFilterTest, + ::testing::Values(av1_apply_selfguided_restoration_sse4_1)); +#endif + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P( + AVX2, AV1SelfguidedFilterTest, + ::testing::Values(av1_apply_selfguided_restoration_avx2)); +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, AV1SelfguidedFilterTest, + ::testing::Values(av1_apply_selfguided_restoration_neon)); +#endif + +#if CONFIG_AV1_HIGHBITDEPTH +// Test parameter list: +// <tst_fun_, bit_depth> +typedef tuple<SgrFunc, int> HighbdFilterTestParam; + +class AV1HighbdSelfguidedFilterTest + : public ::testing::TestWithParam<HighbdFilterTestParam> { + public: + ~AV1HighbdSelfguidedFilterTest() override = default; + void SetUp() override {} + + protected: + void RunSpeedTest() { + tst_fun_ = GET_PARAM(0); + const int pu_width = RESTORATION_PROC_UNIT_SIZE; + const int pu_height = RESTORATION_PROC_UNIT_SIZE; + const int width = 256, height = 256, stride = 288, out_stride = 288; + const int NUM_ITERS = 2000; + int i, j, k; + int bit_depth = GET_PARAM(1); + int mask = (1 << bit_depth) - 1; + + uint16_t *input_ = + (uint16_t *)aom_memalign(32, stride * (height + 32) * sizeof(uint16_t)); + ASSERT_NE(input_, nullptr); + uint16_t *output_ = (uint16_t *)aom_memalign( + 32, out_stride * (height + 32) * sizeof(uint16_t)); + ASSERT_NE(output_, nullptr); + int32_t *tmpbuf = (int32_t *)aom_memalign(32, RESTORATION_TMPBUF_SIZE); + ASSERT_NE(tmpbuf, nullptr); + uint16_t *input = input_ + stride * 16 + 16; + uint16_t *output = output_ + out_stride * 16 + 16; + + ACMRandom rnd(ACMRandom::DeterministicSeed()); + + for (i = -16; i < height + 16; ++i) + for (j = -16; j < width + 16; ++j) + input[i * stride + j] = rnd.Rand16() & mask; + + int xqd[2] = { SGRPROJ_PRJ_MIN0 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 - + SGRPROJ_PRJ_MIN0), + SGRPROJ_PRJ_MIN1 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 - + SGRPROJ_PRJ_MIN1) }; + // Fix a parameter set, since the speed depends slightly on r. + // Change this to test different combinations of values of r. + int eps = 15; + + av1_loop_restoration_precal(); + + aom_usec_timer ref_timer; + aom_usec_timer_start(&ref_timer); + for (i = 0; i < NUM_ITERS; ++i) { + for (k = 0; k < height; k += pu_height) + for (j = 0; j < width; j += pu_width) { + int w = AOMMIN(pu_width, width - j); + int h = AOMMIN(pu_height, height - k); + uint16_t *input_p = input + k * stride + j; + uint16_t *output_p = output + k * out_stride + j; + av1_apply_selfguided_restoration_c( + CONVERT_TO_BYTEPTR(input_p), w, h, stride, eps, xqd, + CONVERT_TO_BYTEPTR(output_p), out_stride, tmpbuf, bit_depth, 1); + } + } + aom_usec_timer_mark(&ref_timer); + const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer); + + aom_usec_timer tst_timer; + aom_usec_timer_start(&tst_timer); + for (i = 0; i < NUM_ITERS; ++i) { + for (k = 0; k < height; k += pu_height) + for (j = 0; j < width; j += pu_width) { + int w = AOMMIN(pu_width, width - j); + int h = AOMMIN(pu_height, height - k); + uint16_t *input_p = input + k * stride + j; + uint16_t *output_p = output + k * out_stride + j; + tst_fun_(CONVERT_TO_BYTEPTR(input_p), w, h, stride, eps, xqd, + CONVERT_TO_BYTEPTR(output_p), out_stride, tmpbuf, bit_depth, + 1); + } + } + aom_usec_timer_mark(&tst_timer); + const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer); + + std::cout << "[ ] C time = " << ref_time / 1000 + << " ms, SIMD time = " << tst_time / 1000 << " ms\n"; + + EXPECT_GT(ref_time, tst_time) + << "Error: AV1HighbdSelfguidedFilterTest.SpeedTest, SIMD slower than " + "C.\n" + << "C time: " << ref_time << " us\n" + << "SIMD time: " << tst_time << " us\n"; + + aom_free(input_); + aom_free(output_); + aom_free(tmpbuf); + } + + void RunCorrectnessTest() { + tst_fun_ = GET_PARAM(0); + const int pu_width = RESTORATION_PROC_UNIT_SIZE; + const int pu_height = RESTORATION_PROC_UNIT_SIZE; + // Set the maximum width/height to test here. We actually test a small + // range of sizes *up to* this size, so that we can check, eg., + // the behaviour on tiles which are not a multiple of 4 wide. + const int max_w = 260, max_h = 260, stride = 672, out_stride = 672; + const int NUM_ITERS = 81; + int i, j, k; + int bit_depth = GET_PARAM(1); + int mask = (1 << bit_depth) - 1; + + uint16_t *input_ = + (uint16_t *)aom_memalign(32, stride * (max_h + 32) * sizeof(uint16_t)); + ASSERT_NE(input_, nullptr); + uint16_t *output_ = (uint16_t *)aom_memalign( + 32, out_stride * (max_h + 32) * sizeof(uint16_t)); + ASSERT_NE(output_, nullptr); + uint16_t *output2_ = (uint16_t *)aom_memalign( + 32, out_stride * (max_h + 32) * sizeof(uint16_t)); + ASSERT_NE(output2_, nullptr); + int32_t *tmpbuf = (int32_t *)aom_memalign(32, RESTORATION_TMPBUF_SIZE); + ASSERT_NE(tmpbuf, nullptr); + + uint16_t *input = input_ + stride * 16 + 16; + uint16_t *output = output_ + out_stride * 16 + 16; + uint16_t *output2 = output2_ + out_stride * 16 + 16; + + ACMRandom rnd(ACMRandom::DeterministicSeed()); + + av1_loop_restoration_precal(); + + for (i = 0; i < NUM_ITERS; ++i) { + for (j = -16; j < max_h + 16; ++j) + for (k = -16; k < max_w + 16; ++k) + input[j * stride + k] = rnd.Rand16() & mask; + + int xqd[2] = { SGRPROJ_PRJ_MIN0 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 - + SGRPROJ_PRJ_MIN0), + SGRPROJ_PRJ_MIN1 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 - + SGRPROJ_PRJ_MIN1) }; + int eps = rnd.PseudoUniform(1 << SGRPROJ_PARAMS_BITS); + + // Test various tile sizes around 256x256 + int test_w = max_w - (i / 9); + int test_h = max_h - (i % 9); + + for (k = 0; k < test_h; k += pu_height) + for (j = 0; j < test_w; j += pu_width) { + int w = AOMMIN(pu_width, test_w - j); + int h = AOMMIN(pu_height, test_h - k); + uint16_t *input_p = input + k * stride + j; + uint16_t *output_p = output + k * out_stride + j; + uint16_t *output2_p = output2 + k * out_stride + j; + tst_fun_(CONVERT_TO_BYTEPTR(input_p), w, h, stride, eps, xqd, + CONVERT_TO_BYTEPTR(output_p), out_stride, tmpbuf, bit_depth, + 1); + av1_apply_selfguided_restoration_c( + CONVERT_TO_BYTEPTR(input_p), w, h, stride, eps, xqd, + CONVERT_TO_BYTEPTR(output2_p), out_stride, tmpbuf, bit_depth, 1); + } + + for (j = 0; j < test_h; ++j) + for (k = 0; k < test_w; ++k) + ASSERT_EQ(output[j * out_stride + k], output2[j * out_stride + k]); + } + + aom_free(input_); + aom_free(output_); + aom_free(output2_); + aom_free(tmpbuf); + } + + private: + SgrFunc tst_fun_; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1HighbdSelfguidedFilterTest); + +TEST_P(AV1HighbdSelfguidedFilterTest, DISABLED_SpeedTest) { RunSpeedTest(); } +TEST_P(AV1HighbdSelfguidedFilterTest, CorrectnessTest) { RunCorrectnessTest(); } + +#if HAVE_SSE4_1 +const int highbd_params_sse4_1[] = { 8, 10, 12 }; +INSTANTIATE_TEST_SUITE_P( + SSE4_1, AV1HighbdSelfguidedFilterTest, + ::testing::Combine( + ::testing::Values(av1_apply_selfguided_restoration_sse4_1), + ::testing::ValuesIn(highbd_params_sse4_1))); +#endif + +#if HAVE_AVX2 +const int highbd_params_avx2[] = { 8, 10, 12 }; +INSTANTIATE_TEST_SUITE_P( + AVX2, AV1HighbdSelfguidedFilterTest, + ::testing::Combine(::testing::Values(av1_apply_selfguided_restoration_avx2), + ::testing::ValuesIn(highbd_params_avx2))); +#endif + +#if HAVE_NEON +const int highbd_params_neon[] = { 8, 10, 12 }; +INSTANTIATE_TEST_SUITE_P( + NEON, AV1HighbdSelfguidedFilterTest, + ::testing::Combine(::testing::Values(av1_apply_selfguided_restoration_neon), + ::testing::ValuesIn(highbd_params_neon))); +#endif +#endif // CONFIG_AV1_HIGHBITDEPTH +} // namespace |