diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
commit | 26a029d407be480d791972afb5975cf62c9360a6 (patch) | |
tree | f435a8308119effd964b339f76abb83a57c29483 /third_party/aom/test/avg_test.cc | |
parent | Initial commit. (diff) | |
download | firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip |
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/aom/test/avg_test.cc')
-rw-r--r-- | third_party/aom/test/avg_test.cc | 1150 |
1 files changed, 1150 insertions, 0 deletions
diff --git a/third_party/aom/test/avg_test.cc b/third_party/aom/test/avg_test.cc new file mode 100644 index 0000000000..6f4c2ff332 --- /dev/null +++ b/third_party/aom/test/avg_test.cc @@ -0,0 +1,1150 @@ +/* + * Copyright (c) 2019, Alliance for Open Media. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <stdlib.h> +#include <ostream> +#include <string> +#include <tuple> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "config/aom_config.h" +#include "config/aom_dsp_rtcd.h" + +#include "aom_ports/aom_timer.h" +#include "aom_ports/mem.h" +#include "test/acm_random.h" +#include "test/register_state_check.h" +#include "test/util.h" + +namespace { + +using libaom_test::ACMRandom; + +template <typename Pixel> +class AverageTestBase : public ::testing::Test { + public: + AverageTestBase(int width, int height, int bit_depth = 8) + : width_(width), height_(height), source_data_(nullptr), + source_stride_(0), bit_depth_(bit_depth) {} + + void TearDown() override { + aom_free(source_data_); + source_data_ = nullptr; + } + + protected: + // Handle blocks up to 4 blocks 64x64 with stride up to 128 + static const int kDataAlignment = 16; + static const int kDataBlockWidth = 128; + static const int kDataBlockHeight = 128; + static const int kDataBlockSize = kDataBlockWidth * kDataBlockHeight; + + void SetUp() override { + const testing::TestInfo *const test_info = + testing::UnitTest::GetInstance()->current_test_info(); + // Skip the speed test for C code as the baseline uses the same function. + if (std::string(test_info->test_suite_name()).find("C/") == 0 && + std::string(test_info->name()).find("DISABLED_Speed") != + std::string::npos) { + GTEST_SKIP(); + } + + source_data_ = static_cast<Pixel *>( + aom_memalign(kDataAlignment, kDataBlockSize * sizeof(source_data_[0]))); + ASSERT_NE(source_data_, nullptr); + memset(source_data_, 0, kDataBlockSize * sizeof(source_data_[0])); + source_stride_ = (width_ + 31) & ~31; + bit_depth_ = 8; + rnd_.Reset(ACMRandom::DeterministicSeed()); + } + + // Sum Pixels + static unsigned int ReferenceAverage8x8(const Pixel *source, int pitch) { + unsigned int average = 0; + for (int h = 0; h < 8; ++h) { + for (int w = 0; w < 8; ++w) average += source[h * pitch + w]; + } + return (average + 32) >> 6; + } + + static void ReferenceAverage8x8_quad(const uint8_t *source, int pitch, + int x16_idx, int y16_idx, int *avg) { + for (int k = 0; k < 4; k++) { + int average = 0; + int x8_idx = x16_idx + ((k & 1) << 3); + int y8_idx = y16_idx + ((k >> 1) << 3); + for (int h = 0; h < 8; ++h) { + for (int w = 0; w < 8; ++w) + average += source[(h + y8_idx) * pitch + w + x8_idx]; + } + avg[k] = (average + 32) >> 6; + } + } + + static unsigned int ReferenceAverage4x4(const Pixel *source, int pitch) { + unsigned int average = 0; + for (int h = 0; h < 4; ++h) { + for (int w = 0; w < 4; ++w) average += source[h * pitch + w]; + } + return (average + 8) >> 4; + } + + void FillConstant(Pixel fill_constant) { + for (int i = 0; i < width_ * height_; ++i) { + source_data_[i] = fill_constant; + } + } + + void FillRandom() { + for (int i = 0; i < width_ * height_; ++i) { + source_data_[i] = rnd_.Rand16() & ((1 << bit_depth_) - 1); + } + } + + int width_, height_; + Pixel *source_data_; + int source_stride_; + int bit_depth_; + + ACMRandom rnd_; +}; +typedef unsigned int (*AverageFunction)(const uint8_t *s, int pitch); + +// Arguments: width, height, bit_depth, buffer start offset, block size, avg +// function. +typedef std::tuple<int, int, int, int, int, AverageFunction> AvgFunc; + +template <typename Pixel> +class AverageTest : public AverageTestBase<Pixel>, + public ::testing::WithParamInterface<AvgFunc> { + public: + AverageTest() + : AverageTestBase<Pixel>(GET_PARAM(0), GET_PARAM(1), GET_PARAM(2)) {} + + protected: + using AverageTestBase<Pixel>::source_data_; + using AverageTestBase<Pixel>::source_stride_; + using AverageTestBase<Pixel>::ReferenceAverage8x8; + using AverageTestBase<Pixel>::ReferenceAverage4x4; + using AverageTestBase<Pixel>::FillConstant; + using AverageTestBase<Pixel>::FillRandom; + + void CheckAverages() { + const int block_size = GET_PARAM(4); + unsigned int expected = 0; + + // The reference frame, but not the source frame, may be unaligned for + // certain types of searches. + const Pixel *const src = source_data_ + GET_PARAM(3); + if (block_size == 8) { + expected = ReferenceAverage8x8(src, source_stride_); + } else if (block_size == 4) { + expected = ReferenceAverage4x4(src, source_stride_); + } + + aom_usec_timer timer; + unsigned int actual; + if (sizeof(Pixel) == 2) { +#if CONFIG_AV1_HIGHBITDEPTH + AverageFunction avg_c = + (block_size == 8) ? aom_highbd_avg_8x8_c : aom_highbd_avg_4x4_c; + // To avoid differences in optimization with the local Reference*() + // functions the C implementation is used as a baseline. + aom_usec_timer_start(&timer); + avg_c(CONVERT_TO_BYTEPTR(src), source_stride_); + aom_usec_timer_mark(&timer); + ref_elapsed_time_ += aom_usec_timer_elapsed(&timer); + + AverageFunction avg_opt = GET_PARAM(5); + API_REGISTER_STATE_CHECK( + aom_usec_timer_start(&timer); + actual = avg_opt(CONVERT_TO_BYTEPTR(src), source_stride_); + aom_usec_timer_mark(&timer)); +#endif // CONFIG_AV1_HIGHBITDEPTH + } else { + ASSERT_EQ(sizeof(Pixel), 1u); + + AverageFunction avg_c = (block_size == 8) ? aom_avg_8x8_c : aom_avg_4x4_c; + aom_usec_timer_start(&timer); + avg_c(reinterpret_cast<const uint8_t *>(src), source_stride_); + aom_usec_timer_mark(&timer); + ref_elapsed_time_ += aom_usec_timer_elapsed(&timer); + + AverageFunction avg_opt = GET_PARAM(5); + API_REGISTER_STATE_CHECK( + aom_usec_timer_start(&timer); + actual = + avg_opt(reinterpret_cast<const uint8_t *>(src), source_stride_); + aom_usec_timer_mark(&timer)); + } + opt_elapsed_time_ += aom_usec_timer_elapsed(&timer); + + EXPECT_EQ(expected, actual); + } + + void TestConstantValue(Pixel value) { + FillConstant(value); + CheckAverages(); + } + + void TestRandom(int iterations = 1000) { + for (int i = 0; i < iterations; i++) { + FillRandom(); + CheckAverages(); + } + } + + void PrintTimingStats() const { + printf( + "block_size = %d \t ref_time = %d \t simd_time = %d \t Gain = %4.2f\n", + GET_PARAM(4), static_cast<int>(ref_elapsed_time_), + static_cast<int>(opt_elapsed_time_), + (static_cast<float>(ref_elapsed_time_) / + static_cast<float>(opt_elapsed_time_))); + } + + int64_t ref_elapsed_time_ = 0; + int64_t opt_elapsed_time_ = 0; +}; + +typedef void (*AverageFunction_8x8_quad)(const uint8_t *s, int pitch, int x_idx, + int y_idx, int *avg); + +// Arguments: width, height, bit_depth, buffer start offset, block size, avg +// function. +typedef std::tuple<int, int, int, int, int, AverageFunction_8x8_quad> + AvgFunc_8x8_quad; + +template <typename Pixel> +class AverageTest_8x8_quad + : public AverageTestBase<Pixel>, + public ::testing::WithParamInterface<AvgFunc_8x8_quad> { + public: + AverageTest_8x8_quad() + : AverageTestBase<Pixel>(GET_PARAM(0), GET_PARAM(1), GET_PARAM(2)) {} + + protected: + using AverageTestBase<Pixel>::source_data_; + using AverageTestBase<Pixel>::source_stride_; + using AverageTestBase<Pixel>::ReferenceAverage8x8_quad; + using AverageTestBase<Pixel>::FillConstant; + using AverageTestBase<Pixel>::FillRandom; + + void CheckAveragesAt(int iterations, int x16_idx, int y16_idx) { + ASSERT_EQ(sizeof(Pixel), 1u); + const int block_size = GET_PARAM(4); + (void)block_size; + int expected[4] = { 0 }; + + // The reference frame, but not the source frame, may be unaligned for + // certain types of searches. + const Pixel *const src = source_data_ + GET_PARAM(3); + ReferenceAverage8x8_quad(src, source_stride_, x16_idx, y16_idx, expected); + + aom_usec_timer timer; + int expected_c[4] = { 0 }; + int actual[4] = { 0 }; + AverageFunction_8x8_quad avg_c = aom_avg_8x8_quad_c; + aom_usec_timer_start(&timer); + for (int i = 0; i < iterations; i++) { + avg_c(reinterpret_cast<const uint8_t *>(src), source_stride_, x16_idx, + y16_idx, expected_c); + } + aom_usec_timer_mark(&timer); + ref_elapsed_time_ += aom_usec_timer_elapsed(&timer); + + AverageFunction_8x8_quad avg_opt = GET_PARAM(5); + aom_usec_timer_start(&timer); + for (int i = 0; i < iterations; i++) { + avg_opt(reinterpret_cast<const uint8_t *>(src), source_stride_, x16_idx, + y16_idx, actual); + } + aom_usec_timer_mark(&timer); + opt_elapsed_time_ += aom_usec_timer_elapsed(&timer); + + for (int k = 0; k < 4; k++) { + EXPECT_EQ(expected[k], actual[k]); + EXPECT_EQ(expected_c[k], actual[k]); + } + + // Print scaling information only when Speed test is called. + if (iterations > 1) { + printf("ref_time = %d \t simd_time = %d \t Gain = %4.2f\n", + static_cast<int>(ref_elapsed_time_), + static_cast<int>(opt_elapsed_time_), + (static_cast<float>(ref_elapsed_time_) / + static_cast<float>(opt_elapsed_time_))); + } + } + + void CheckAverages() { + for (int x16_idx = 0; x16_idx < this->kDataBlockWidth / 8; x16_idx += 2) + for (int y16_idx = 0; y16_idx < this->kDataBlockHeight / 8; y16_idx += 2) + CheckAveragesAt(1, x16_idx, y16_idx); + } + + void TestConstantValue(Pixel value) { + FillConstant(value); + CheckAverages(); + } + + void TestRandom() { + FillRandom(); + CheckAverages(); + } + + void TestSpeed() { + FillRandom(); + CheckAveragesAt(1000000, 0, 0); + } + + int64_t ref_elapsed_time_ = 0; + int64_t opt_elapsed_time_ = 0; +}; + +using AverageTest8bpp = AverageTest<uint8_t>; + +TEST_P(AverageTest8bpp, MinValue) { TestConstantValue(0); } + +TEST_P(AverageTest8bpp, MaxValue) { TestConstantValue(255); } + +TEST_P(AverageTest8bpp, Random) { TestRandom(); } + +TEST_P(AverageTest8bpp, DISABLED_Speed) { + TestRandom(1000000); + PrintTimingStats(); +} + +using AvgTest8bpp_avg_8x8_quad = AverageTest_8x8_quad<uint8_t>; + +TEST_P(AvgTest8bpp_avg_8x8_quad, MinValue) { TestConstantValue(0); } + +TEST_P(AvgTest8bpp_avg_8x8_quad, MaxValue) { TestConstantValue(255); } + +TEST_P(AvgTest8bpp_avg_8x8_quad, Random) { TestRandom(); } + +TEST_P(AvgTest8bpp_avg_8x8_quad, DISABLED_Speed) { TestSpeed(); } + +#if CONFIG_AV1_HIGHBITDEPTH +using AverageTestHbd = AverageTest<uint16_t>; + +TEST_P(AverageTestHbd, MinValue) { TestConstantValue(0); } + +TEST_P(AverageTestHbd, MaxValue10bit) { TestConstantValue(1023); } +TEST_P(AverageTestHbd, MaxValue12bit) { TestConstantValue(4095); } + +TEST_P(AverageTestHbd, Random) { TestRandom(); } + +TEST_P(AverageTestHbd, DISABLED_Speed) { + TestRandom(1000000); + PrintTimingStats(); +} +#endif // CONFIG_AV1_HIGHBITDEPTH + +typedef void (*IntProRowFunc)(int16_t *hbuf, uint8_t const *ref, + const int ref_stride, const int width, + const int height, int norm_factor); + +// Params: width, height, asm function, c function. +typedef std::tuple<int, int, IntProRowFunc, IntProRowFunc> IntProRowParam; + +class IntProRowTest : public AverageTestBase<uint8_t>, + public ::testing::WithParamInterface<IntProRowParam> { + public: + IntProRowTest() + : AverageTestBase(GET_PARAM(0), GET_PARAM(1)), hbuf_asm_(nullptr), + hbuf_c_(nullptr) { + asm_func_ = GET_PARAM(2); + c_func_ = GET_PARAM(3); + } + + void set_norm_factor() { + if (height_ == 128) + norm_factor_ = 6; + else if (height_ == 64) + norm_factor_ = 5; + else if (height_ == 32) + norm_factor_ = 4; + else if (height_ == 16) + norm_factor_ = 3; + } + + protected: + void SetUp() override { + source_data_ = static_cast<uint8_t *>( + aom_memalign(kDataAlignment, kDataBlockSize * sizeof(source_data_[0]))); + ASSERT_NE(source_data_, nullptr); + + hbuf_asm_ = static_cast<int16_t *>( + aom_memalign(kDataAlignment, sizeof(*hbuf_asm_) * width_)); + ASSERT_NE(hbuf_asm_, nullptr); + hbuf_c_ = static_cast<int16_t *>( + aom_memalign(kDataAlignment, sizeof(*hbuf_c_) * width_)); + ASSERT_NE(hbuf_c_, nullptr); + } + + void TearDown() override { + aom_free(source_data_); + source_data_ = nullptr; + aom_free(hbuf_c_); + hbuf_c_ = nullptr; + aom_free(hbuf_asm_); + hbuf_asm_ = nullptr; + } + + void RunComparison() { + set_norm_factor(); + API_REGISTER_STATE_CHECK( + c_func_(hbuf_c_, source_data_, width_, width_, height_, norm_factor_)); + API_REGISTER_STATE_CHECK(asm_func_(hbuf_asm_, source_data_, width_, width_, + height_, norm_factor_)); + EXPECT_EQ(0, memcmp(hbuf_c_, hbuf_asm_, sizeof(*hbuf_c_) * width_)) + << "Output mismatch\n"; + } + + void RunSpeedTest() { + const int numIter = 5000000; + set_norm_factor(); + printf("Blk_Size=%dx%d: number of iteration is %d \n", width_, height_, + numIter); + aom_usec_timer c_timer_; + aom_usec_timer_start(&c_timer_); + for (int i = 0; i < numIter; i++) { + c_func_(hbuf_c_, source_data_, width_, width_, height_, norm_factor_); + } + aom_usec_timer_mark(&c_timer_); + + aom_usec_timer asm_timer_; + aom_usec_timer_start(&asm_timer_); + + for (int i = 0; i < numIter; i++) { + asm_func_(hbuf_asm_, source_data_, width_, width_, height_, norm_factor_); + } + aom_usec_timer_mark(&asm_timer_); + + const int c_sum_time = static_cast<int>(aom_usec_timer_elapsed(&c_timer_)); + const int asm_sum_time = + static_cast<int>(aom_usec_timer_elapsed(&asm_timer_)); + + printf("c_time = %d \t simd_time = %d \t Gain = %4.2f \n", c_sum_time, + asm_sum_time, + (static_cast<float>(c_sum_time) / static_cast<float>(asm_sum_time))); + + EXPECT_EQ(0, memcmp(hbuf_c_, hbuf_asm_, sizeof(*hbuf_c_) * width_)) + << "Output mismatch\n"; + } + + private: + IntProRowFunc asm_func_; + IntProRowFunc c_func_; + int16_t *hbuf_asm_; + int16_t *hbuf_c_; + int norm_factor_; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(IntProRowTest); + +typedef void (*IntProColFunc)(int16_t *vbuf, uint8_t const *ref, + const int ref_stride, const int width, + const int height, int norm_factor); + +// Params: width, height, asm function, c function. +typedef std::tuple<int, int, IntProColFunc, IntProColFunc> IntProColParam; + +class IntProColTest : public AverageTestBase<uint8_t>, + public ::testing::WithParamInterface<IntProColParam> { + public: + IntProColTest() + : AverageTestBase(GET_PARAM(0), GET_PARAM(1)), vbuf_asm_(nullptr), + vbuf_c_(nullptr) { + asm_func_ = GET_PARAM(2); + c_func_ = GET_PARAM(3); + } + + protected: + void SetUp() override { + source_data_ = static_cast<uint8_t *>( + aom_memalign(kDataAlignment, kDataBlockSize * sizeof(source_data_[0]))); + ASSERT_NE(source_data_, nullptr); + + vbuf_asm_ = static_cast<int16_t *>( + aom_memalign(kDataAlignment, sizeof(*vbuf_asm_) * width_)); + ASSERT_NE(vbuf_asm_, nullptr); + vbuf_c_ = static_cast<int16_t *>( + aom_memalign(kDataAlignment, sizeof(*vbuf_c_) * width_)); + ASSERT_NE(vbuf_c_, nullptr); + } + + void TearDown() override { + aom_free(source_data_); + source_data_ = nullptr; + aom_free(vbuf_c_); + vbuf_c_ = nullptr; + aom_free(vbuf_asm_); + vbuf_asm_ = nullptr; + } + + void RunComparison() { + int norm_factor_ = 3 + (width_ >> 5); + API_REGISTER_STATE_CHECK( + c_func_(vbuf_c_, source_data_, width_, width_, height_, norm_factor_)); + API_REGISTER_STATE_CHECK(asm_func_(vbuf_asm_, source_data_, width_, width_, + height_, norm_factor_)); + EXPECT_EQ(0, memcmp(vbuf_c_, vbuf_asm_, sizeof(*vbuf_c_) * height_)) + << "Output mismatch\n"; + } + void RunSpeedTest() { + const int numIter = 5000000; + printf("Blk_Size=%dx%d: number of iteration is %d \n", width_, height_, + numIter); + int norm_factor_ = 3 + (width_ >> 5); + aom_usec_timer c_timer_; + aom_usec_timer_start(&c_timer_); + for (int i = 0; i < numIter; i++) { + c_func_(vbuf_c_, source_data_, width_, width_, height_, norm_factor_); + } + aom_usec_timer_mark(&c_timer_); + + aom_usec_timer asm_timer_; + aom_usec_timer_start(&asm_timer_); + + for (int i = 0; i < numIter; i++) { + asm_func_(vbuf_asm_, source_data_, width_, width_, height_, norm_factor_); + } + aom_usec_timer_mark(&asm_timer_); + + const int c_sum_time = static_cast<int>(aom_usec_timer_elapsed(&c_timer_)); + const int asm_sum_time = + static_cast<int>(aom_usec_timer_elapsed(&asm_timer_)); + + printf("c_time = %d \t simd_time = %d \t Gain = %4.2f \n", c_sum_time, + asm_sum_time, + (static_cast<float>(c_sum_time) / static_cast<float>(asm_sum_time))); + + EXPECT_EQ(0, memcmp(vbuf_c_, vbuf_asm_, sizeof(*vbuf_c_) * height_)) + << "Output mismatch\n"; + } + + private: + IntProColFunc asm_func_; + IntProColFunc c_func_; + int16_t *vbuf_asm_; + int16_t *vbuf_c_; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(IntProColTest); + +TEST_P(IntProRowTest, MinValue) { + FillConstant(0); + RunComparison(); +} + +TEST_P(IntProRowTest, MaxValue) { + FillConstant(255); + RunComparison(); +} + +TEST_P(IntProRowTest, Random) { + FillRandom(); + RunComparison(); +} + +TEST_P(IntProRowTest, DISABLED_Speed) { + FillRandom(); + RunSpeedTest(); +} + +TEST_P(IntProColTest, MinValue) { + FillConstant(0); + RunComparison(); +} + +TEST_P(IntProColTest, MaxValue) { + FillConstant(255); + RunComparison(); +} + +TEST_P(IntProColTest, Random) { + FillRandom(); + RunComparison(); +} + +TEST_P(IntProColTest, DISABLED_Speed) { + FillRandom(); + RunSpeedTest(); +} +class VectorVarTestBase : public ::testing::Test { + public: + explicit VectorVarTestBase(int bwl) { m_bwl = bwl; } + VectorVarTestBase() = default; + ~VectorVarTestBase() override = default; + + protected: + static const int kDataAlignment = 16; + + void SetUp() override { + width = 4 << m_bwl; + + ref_vector = static_cast<int16_t *>( + aom_memalign(kDataAlignment, width * sizeof(ref_vector[0]))); + ASSERT_NE(ref_vector, nullptr); + src_vector = static_cast<int16_t *>( + aom_memalign(kDataAlignment, width * sizeof(src_vector[0]))); + ASSERT_NE(src_vector, nullptr); + + rnd_.Reset(ACMRandom::DeterministicSeed()); + } + void TearDown() override { + aom_free(ref_vector); + ref_vector = nullptr; + aom_free(src_vector); + src_vector = nullptr; + } + + void FillConstant(int16_t fill_constant_ref, int16_t fill_constant_src) { + for (int i = 0; i < width; ++i) { + ref_vector[i] = fill_constant_ref; + src_vector[i] = fill_constant_src; + } + } + + void FillRandom() { + for (int i = 0; i < width; ++i) { + ref_vector[i] = + rnd_.Rand16() % max_range; // acc. aom_vector_var_c brief. + src_vector[i] = rnd_.Rand16() % max_range; + } + } + + int width; + int m_bwl; + int16_t *ref_vector; + int16_t *src_vector; + ACMRandom rnd_; + + static const int max_range = 510; + static const int num_random_cmp = 50; +}; + +typedef int (*VectorVarFunc)(const int16_t *ref, const int16_t *src, + const int bwl); + +typedef std::tuple<int, VectorVarFunc, VectorVarFunc> VecVarFunc; + +class VectorVarTest : public VectorVarTestBase, + public ::testing::WithParamInterface<VecVarFunc> { + public: + VectorVarTest() + : VectorVarTestBase(GET_PARAM(0)), c_func(GET_PARAM(1)), + simd_func(GET_PARAM(2)) {} + + protected: + int calcVarC() { return c_func(ref_vector, src_vector, m_bwl); } + int calcVarSIMD() { return simd_func(ref_vector, src_vector, m_bwl); } + + VectorVarFunc c_func; + VectorVarFunc simd_func; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(VectorVarTest); + +TEST_P(VectorVarTest, MaxVar) { + FillConstant(0, max_range); + int c_var = calcVarC(); + int simd_var = calcVarSIMD(); + ASSERT_EQ(c_var, simd_var); +} +TEST_P(VectorVarTest, MaxVarRev) { + FillConstant(max_range, 0); + int c_var = calcVarC(); + int simd_var = calcVarSIMD(); + ASSERT_EQ(c_var, simd_var); +} +TEST_P(VectorVarTest, ZeroDiff) { + FillConstant(0, 0); + int c_var = calcVarC(); + int simd_var = calcVarSIMD(); + ASSERT_EQ(c_var, simd_var); +} +TEST_P(VectorVarTest, ZeroDiff2) { + FillConstant(max_range, max_range); + int c_var = calcVarC(); + int simd_var = calcVarSIMD(); + ASSERT_EQ(c_var, simd_var); +} +TEST_P(VectorVarTest, Constant) { + FillConstant(30, 90); + int c_var = calcVarC(); + int simd_var = calcVarSIMD(); + ASSERT_EQ(c_var, simd_var); +} +TEST_P(VectorVarTest, Random) { + for (size_t i = 0; i < num_random_cmp; i++) { + FillRandom(); + int c_var = calcVarC(); + int simd_var = calcVarSIMD(); + ASSERT_EQ(c_var, simd_var); + } +} +TEST_P(VectorVarTest, DISABLED_Speed) { + FillRandom(); + const int numIter = 5000000; + printf("Width = %d number of iteration is %d \n", width, numIter); + + int sum_c_var = 0; + int c_var = 0; + + aom_usec_timer c_timer_; + aom_usec_timer_start(&c_timer_); + for (size_t i = 0; i < numIter; i++) { + c_var = calcVarC(); + sum_c_var += c_var; + } + aom_usec_timer_mark(&c_timer_); + + int simd_var = 0; + int sum_simd_var = 0; + aom_usec_timer simd_timer_; + aom_usec_timer_start(&simd_timer_); + for (size_t i = 0; i < numIter; i++) { + simd_var = calcVarSIMD(); + sum_simd_var += simd_var; + } + aom_usec_timer_mark(&simd_timer_); + + const int c_sum_time = static_cast<int>(aom_usec_timer_elapsed(&c_timer_)); + const int simd_sum_time = + static_cast<int>(aom_usec_timer_elapsed(&simd_timer_)); + + printf("c_time = %d \t simd_time = %d \t Gain = %4.2f \n", c_sum_time, + simd_sum_time, + (static_cast<float>(c_sum_time) / static_cast<float>(simd_sum_time))); + + EXPECT_EQ(c_var, simd_var) << "Output mismatch \n"; + EXPECT_EQ(sum_c_var, sum_simd_var) << "Output mismatch \n"; +} + +using std::make_tuple; + +INSTANTIATE_TEST_SUITE_P( + C, AverageTest8bpp, + ::testing::Values(make_tuple(16, 16, 8, 1, 8, &aom_avg_8x8_c), + make_tuple(16, 16, 8, 1, 4, &aom_avg_4x4_c))); + +INSTANTIATE_TEST_SUITE_P( + C, AvgTest8bpp_avg_8x8_quad, + ::testing::Values(make_tuple(16, 16, 8, 0, 16, &aom_avg_8x8_quad_c), + make_tuple(32, 32, 8, 16, 16, &aom_avg_8x8_quad_c), + make_tuple(32, 32, 8, 8, 16, &aom_avg_8x8_quad_c))); + +#if HAVE_SSE2 +INSTANTIATE_TEST_SUITE_P( + SSE2, AverageTest8bpp, + ::testing::Values(make_tuple(16, 16, 8, 0, 8, &aom_avg_8x8_sse2), + make_tuple(16, 16, 8, 5, 8, &aom_avg_8x8_sse2), + make_tuple(32, 32, 8, 15, 8, &aom_avg_8x8_sse2), + make_tuple(16, 16, 8, 0, 4, &aom_avg_4x4_sse2), + make_tuple(16, 16, 8, 5, 4, &aom_avg_4x4_sse2), + make_tuple(32, 32, 8, 15, 4, &aom_avg_4x4_sse2))); + +INSTANTIATE_TEST_SUITE_P( + SSE2, AvgTest8bpp_avg_8x8_quad, + ::testing::Values(make_tuple(16, 16, 8, 0, 16, &aom_avg_8x8_quad_sse2), + make_tuple(32, 32, 8, 16, 16, &aom_avg_8x8_quad_sse2), + make_tuple(32, 32, 8, 8, 16, &aom_avg_8x8_quad_sse2))); + +INSTANTIATE_TEST_SUITE_P( + SSE2, IntProRowTest, + ::testing::Values( + make_tuple(16, 16, &aom_int_pro_row_sse2, &aom_int_pro_row_c), + make_tuple(32, 32, &aom_int_pro_row_sse2, &aom_int_pro_row_c), + make_tuple(64, 64, &aom_int_pro_row_sse2, &aom_int_pro_row_c), + make_tuple(128, 128, &aom_int_pro_row_sse2, &aom_int_pro_row_c))); + +INSTANTIATE_TEST_SUITE_P( + SSE2, IntProColTest, + ::testing::Values( + make_tuple(16, 16, &aom_int_pro_col_sse2, &aom_int_pro_col_c), + make_tuple(32, 32, &aom_int_pro_col_sse2, &aom_int_pro_col_c), + make_tuple(64, 64, &aom_int_pro_col_sse2, &aom_int_pro_col_c), + make_tuple(128, 128, &aom_int_pro_col_sse2, &aom_int_pro_col_c))); +#endif + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P( + AVX2, AvgTest8bpp_avg_8x8_quad, + ::testing::Values(make_tuple(16, 16, 8, 0, 16, &aom_avg_8x8_quad_avx2), + make_tuple(32, 32, 8, 16, 16, &aom_avg_8x8_quad_avx2), + make_tuple(32, 32, 8, 8, 16, &aom_avg_8x8_quad_avx2))); + +INSTANTIATE_TEST_SUITE_P( + AVX2, IntProRowTest, + ::testing::Values( + make_tuple(16, 16, &aom_int_pro_row_avx2, &aom_int_pro_row_c), + make_tuple(32, 32, &aom_int_pro_row_avx2, &aom_int_pro_row_c), + make_tuple(64, 64, &aom_int_pro_row_avx2, &aom_int_pro_row_c), + make_tuple(128, 128, &aom_int_pro_row_avx2, &aom_int_pro_row_c))); + +INSTANTIATE_TEST_SUITE_P( + AVX2, IntProColTest, + ::testing::Values( + make_tuple(16, 16, &aom_int_pro_col_avx2, &aom_int_pro_col_c), + make_tuple(32, 32, &aom_int_pro_col_avx2, &aom_int_pro_col_c), + make_tuple(64, 64, &aom_int_pro_col_avx2, &aom_int_pro_col_c), + make_tuple(128, 128, &aom_int_pro_col_avx2, &aom_int_pro_col_c))); +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, AverageTest8bpp, + ::testing::Values(make_tuple(16, 16, 8, 0, 8, &aom_avg_8x8_neon), + make_tuple(16, 16, 8, 5, 8, &aom_avg_8x8_neon), + make_tuple(32, 32, 8, 15, 8, &aom_avg_8x8_neon), + make_tuple(16, 16, 8, 0, 4, &aom_avg_4x4_neon), + make_tuple(16, 16, 8, 5, 4, &aom_avg_4x4_neon), + make_tuple(32, 32, 8, 15, 4, &aom_avg_4x4_neon))); +INSTANTIATE_TEST_SUITE_P( + NEON, IntProRowTest, + ::testing::Values( + make_tuple(16, 16, &aom_int_pro_row_neon, &aom_int_pro_row_c), + make_tuple(32, 32, &aom_int_pro_row_neon, &aom_int_pro_row_c), + make_tuple(64, 64, &aom_int_pro_row_neon, &aom_int_pro_row_c), + make_tuple(128, 128, &aom_int_pro_row_neon, &aom_int_pro_row_c))); + +INSTANTIATE_TEST_SUITE_P( + NEON, IntProColTest, + ::testing::Values( + make_tuple(16, 16, &aom_int_pro_col_neon, &aom_int_pro_col_c), + make_tuple(32, 32, &aom_int_pro_col_neon, &aom_int_pro_col_c), + make_tuple(64, 64, &aom_int_pro_col_neon, &aom_int_pro_col_c), + make_tuple(128, 128, &aom_int_pro_col_neon, &aom_int_pro_col_c))); + +INSTANTIATE_TEST_SUITE_P( + NEON, AvgTest8bpp_avg_8x8_quad, + ::testing::Values(make_tuple(16, 16, 8, 0, 16, &aom_avg_8x8_quad_neon), + make_tuple(32, 32, 8, 16, 16, &aom_avg_8x8_quad_neon), + make_tuple(32, 32, 8, 8, 16, &aom_avg_8x8_quad_neon))); +#endif + +#if CONFIG_AV1_HIGHBITDEPTH +INSTANTIATE_TEST_SUITE_P( + C, AverageTestHbd, + ::testing::Values(make_tuple(16, 16, 10, 1, 8, &aom_highbd_avg_8x8_c), + make_tuple(16, 16, 10, 1, 4, &aom_highbd_avg_4x4_c), + make_tuple(16, 16, 12, 1, 8, &aom_highbd_avg_8x8_c), + make_tuple(16, 16, 12, 1, 4, &aom_highbd_avg_4x4_c))); + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, AverageTestHbd, + ::testing::Values(make_tuple(16, 16, 10, 0, 4, &aom_highbd_avg_4x4_neon), + make_tuple(16, 16, 10, 5, 4, &aom_highbd_avg_4x4_neon), + make_tuple(32, 32, 10, 15, 4, &aom_highbd_avg_4x4_neon), + make_tuple(16, 16, 12, 0, 4, &aom_highbd_avg_4x4_neon), + make_tuple(16, 16, 12, 5, 4, &aom_highbd_avg_4x4_neon), + make_tuple(32, 32, 12, 15, 4, &aom_highbd_avg_4x4_neon), + make_tuple(16, 16, 10, 0, 8, &aom_highbd_avg_8x8_neon), + make_tuple(16, 16, 10, 5, 8, &aom_highbd_avg_8x8_neon), + make_tuple(32, 32, 10, 15, 8, &aom_highbd_avg_8x8_neon), + make_tuple(16, 16, 12, 0, 8, &aom_highbd_avg_8x8_neon), + make_tuple(16, 16, 12, 5, 8, &aom_highbd_avg_8x8_neon), + make_tuple(32, 32, 12, 15, 8, &aom_highbd_avg_8x8_neon))); +#endif // HAVE_NEON +#endif // CONFIG_AV1_HIGHBITDEPTH + +typedef int (*SatdFunc)(const tran_low_t *coeffs, int length); +typedef int (*SatdLpFunc)(const int16_t *coeffs, int length); + +template <typename SatdFuncType> +struct SatdTestParam { + SatdTestParam(int s, SatdFuncType f1, SatdFuncType f2) + : satd_size(s), func_ref(f1), func_simd(f2) {} + friend std::ostream &operator<<(std::ostream &os, + const SatdTestParam<SatdFuncType> ¶m) { + return os << "satd_size: " << param.satd_size; + } + int satd_size; + SatdFuncType func_ref; + SatdFuncType func_simd; +}; + +template <typename CoeffType, typename SatdFuncType> +class SatdTestBase + : public ::testing::Test, + public ::testing::WithParamInterface<SatdTestParam<SatdFuncType>> { + protected: + explicit SatdTestBase(const SatdTestParam<SatdFuncType> &func_param) { + satd_size_ = func_param.satd_size; + satd_func_ref_ = func_param.func_ref; + satd_func_simd_ = func_param.func_simd; + } + void SetUp() override { + rnd_.Reset(ACMRandom::DeterministicSeed()); + src_ = reinterpret_cast<CoeffType *>( + aom_memalign(32, sizeof(*src_) * satd_size_)); + ASSERT_NE(src_, nullptr); + } + void TearDown() override { aom_free(src_); } + void FillConstant(const CoeffType val) { + for (int i = 0; i < satd_size_; ++i) src_[i] = val; + } + void FillRandom() { + for (int i = 0; i < satd_size_; ++i) { + src_[i] = static_cast<int16_t>(rnd_.Rand16()); + } + } + void Check(int expected) { + int total_ref; + API_REGISTER_STATE_CHECK(total_ref = satd_func_ref_(src_, satd_size_)); + EXPECT_EQ(expected, total_ref); + + int total_simd; + API_REGISTER_STATE_CHECK(total_simd = satd_func_simd_(src_, satd_size_)); + EXPECT_EQ(expected, total_simd); + } + void RunComparison() { + int total_ref; + API_REGISTER_STATE_CHECK(total_ref = satd_func_ref_(src_, satd_size_)); + + int total_simd; + API_REGISTER_STATE_CHECK(total_simd = satd_func_simd_(src_, satd_size_)); + + EXPECT_EQ(total_ref, total_simd); + } + void RunSpeedTest() { + const int numIter = 500000; + printf("size = %d number of iteration is %d \n", satd_size_, numIter); + + int total_ref; + aom_usec_timer c_timer_; + aom_usec_timer_start(&c_timer_); + for (int i = 0; i < numIter; i++) { + total_ref = satd_func_ref_(src_, satd_size_); + } + aom_usec_timer_mark(&c_timer_); + + int total_simd; + aom_usec_timer simd_timer_; + aom_usec_timer_start(&simd_timer_); + + for (int i = 0; i < numIter; i++) { + total_simd = satd_func_simd_(src_, satd_size_); + } + aom_usec_timer_mark(&simd_timer_); + + const int c_sum_time = static_cast<int>(aom_usec_timer_elapsed(&c_timer_)); + const int simd_sum_time = + static_cast<int>(aom_usec_timer_elapsed(&simd_timer_)); + + printf( + "c_time = %d \t simd_time = %d \t Gain = %4.2f \n", c_sum_time, + simd_sum_time, + (static_cast<float>(c_sum_time) / static_cast<float>(simd_sum_time))); + + EXPECT_EQ(total_ref, total_simd) << "Output mismatch \n"; + } + int satd_size_; + + private: + CoeffType *src_; + SatdFuncType satd_func_ref_; + SatdFuncType satd_func_simd_; + ACMRandom rnd_; +}; + +class SatdTest : public SatdTestBase<tran_low_t, SatdFunc> { + public: + SatdTest() : SatdTestBase(GetParam()) {} +}; + +TEST_P(SatdTest, MinValue) { + const int kMin = -524287; + const int expected = -kMin * satd_size_; + FillConstant(kMin); + Check(expected); +} +TEST_P(SatdTest, MaxValue) { + const int kMax = 524287; + const int expected = kMax * satd_size_; + FillConstant(kMax); + Check(expected); +} +TEST_P(SatdTest, Random) { + int expected; + switch (satd_size_) { + case 16: expected = 205298; break; + case 64: expected = 1113950; break; + case 256: expected = 4268415; break; + case 1024: expected = 16954082; break; + default: + FAIL() << "Invalid satd size (" << satd_size_ + << ") valid: 16/64/256/1024"; + } + FillRandom(); + Check(expected); +} +TEST_P(SatdTest, Match) { + FillRandom(); + RunComparison(); +} +TEST_P(SatdTest, DISABLED_Speed) { + FillRandom(); + RunSpeedTest(); +} +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(SatdTest); + +INSTANTIATE_TEST_SUITE_P( + C, SatdTest, + ::testing::Values(SatdTestParam<SatdFunc>(16, &aom_satd_c, &aom_satd_c), + SatdTestParam<SatdFunc>(64, &aom_satd_c, &aom_satd_c), + SatdTestParam<SatdFunc>(256, &aom_satd_c, &aom_satd_c), + SatdTestParam<SatdFunc>(1024, &aom_satd_c, &aom_satd_c))); + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, SatdTest, + ::testing::Values(SatdTestParam<SatdFunc>(16, &aom_satd_c, &aom_satd_neon), + SatdTestParam<SatdFunc>(64, &aom_satd_c, &aom_satd_neon), + SatdTestParam<SatdFunc>(256, &aom_satd_c, &aom_satd_neon), + SatdTestParam<SatdFunc>(1024, &aom_satd_c, + &aom_satd_neon))); +INSTANTIATE_TEST_SUITE_P( + NEON, VectorVarTest, + ::testing::Values(make_tuple(2, &aom_vector_var_c, &aom_vector_var_neon), + make_tuple(3, &aom_vector_var_c, &aom_vector_var_neon), + make_tuple(4, &aom_vector_var_c, &aom_vector_var_neon), + make_tuple(5, &aom_vector_var_c, &aom_vector_var_neon))); +#endif + +#if HAVE_SVE +INSTANTIATE_TEST_SUITE_P( + SVE, VectorVarTest, + ::testing::Values(make_tuple(2, &aom_vector_var_c, &aom_vector_var_sve), + make_tuple(3, &aom_vector_var_c, &aom_vector_var_sve), + make_tuple(4, &aom_vector_var_c, &aom_vector_var_sve), + make_tuple(5, &aom_vector_var_c, &aom_vector_var_sve))); +#endif // HAVE_SVE + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_SUITE_P( + SSE4_1, VectorVarTest, + ::testing::Values(make_tuple(2, &aom_vector_var_c, &aom_vector_var_sse4_1), + make_tuple(3, &aom_vector_var_c, &aom_vector_var_sse4_1), + make_tuple(4, &aom_vector_var_c, &aom_vector_var_sse4_1), + make_tuple(5, &aom_vector_var_c, + &aom_vector_var_sse4_1))); +#endif // HAVE_SSE4_1 + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P( + AVX2, SatdTest, + ::testing::Values(SatdTestParam<SatdFunc>(16, &aom_satd_c, &aom_satd_avx2), + SatdTestParam<SatdFunc>(64, &aom_satd_c, &aom_satd_avx2), + SatdTestParam<SatdFunc>(256, &aom_satd_c, &aom_satd_avx2), + SatdTestParam<SatdFunc>(1024, &aom_satd_c, + &aom_satd_avx2))); + +INSTANTIATE_TEST_SUITE_P( + AVX2, VectorVarTest, + ::testing::Values(make_tuple(2, &aom_vector_var_c, &aom_vector_var_avx2), + make_tuple(3, &aom_vector_var_c, &aom_vector_var_avx2), + make_tuple(4, &aom_vector_var_c, &aom_vector_var_avx2), + make_tuple(5, &aom_vector_var_c, &aom_vector_var_avx2))); +#endif // HAVE_AVX2 + +#if HAVE_SSE2 +INSTANTIATE_TEST_SUITE_P( + SSE2, SatdTest, + ::testing::Values(SatdTestParam<SatdFunc>(16, &aom_satd_c, &aom_satd_sse2), + SatdTestParam<SatdFunc>(64, &aom_satd_c, &aom_satd_sse2), + SatdTestParam<SatdFunc>(256, &aom_satd_c, &aom_satd_sse2), + SatdTestParam<SatdFunc>(1024, &aom_satd_c, + &aom_satd_sse2))); +#endif + +class SatdLpTest : public SatdTestBase<int16_t, SatdLpFunc> { + public: + SatdLpTest() : SatdTestBase(GetParam()) {} +}; + +TEST_P(SatdLpTest, MinValue) { + const int kMin = -32640; + const int expected = -kMin * satd_size_; + FillConstant(kMin); + Check(expected); +} +TEST_P(SatdLpTest, MaxValue) { + const int kMax = 32640; + const int expected = kMax * satd_size_; + FillConstant(kMax); + Check(expected); +} +TEST_P(SatdLpTest, Random) { + int expected; + switch (satd_size_) { + case 16: expected = 205298; break; + case 64: expected = 1113950; break; + case 256: expected = 4268415; break; + case 1024: expected = 16954082; break; + default: + FAIL() << "Invalid satd size (" << satd_size_ + << ") valid: 16/64/256/1024"; + } + FillRandom(); + Check(expected); +} +TEST_P(SatdLpTest, Match) { + FillRandom(); + RunComparison(); +} +TEST_P(SatdLpTest, DISABLED_Speed) { + FillRandom(); + RunSpeedTest(); +} +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(SatdLpTest); + +// Add the following c test to avoid gtest uninitialized warning. +INSTANTIATE_TEST_SUITE_P( + C, SatdLpTest, + ::testing::Values( + SatdTestParam<SatdLpFunc>(16, &aom_satd_lp_c, &aom_satd_lp_c), + SatdTestParam<SatdLpFunc>(64, &aom_satd_lp_c, &aom_satd_lp_c), + SatdTestParam<SatdLpFunc>(256, &aom_satd_lp_c, &aom_satd_lp_c), + SatdTestParam<SatdLpFunc>(1024, &aom_satd_lp_c, &aom_satd_lp_c))); + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, SatdLpTest, + ::testing::Values( + SatdTestParam<SatdLpFunc>(16, &aom_satd_lp_c, &aom_satd_lp_neon), + SatdTestParam<SatdLpFunc>(64, &aom_satd_lp_c, &aom_satd_lp_neon), + SatdTestParam<SatdLpFunc>(256, &aom_satd_lp_c, &aom_satd_lp_neon), + SatdTestParam<SatdLpFunc>(1024, &aom_satd_lp_c, &aom_satd_lp_neon))); +#endif + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P( + AVX2, SatdLpTest, + ::testing::Values( + SatdTestParam<SatdLpFunc>(16, &aom_satd_lp_c, &aom_satd_lp_avx2), + SatdTestParam<SatdLpFunc>(64, &aom_satd_lp_c, &aom_satd_lp_avx2), + SatdTestParam<SatdLpFunc>(256, &aom_satd_lp_c, &aom_satd_lp_avx2), + SatdTestParam<SatdLpFunc>(1024, &aom_satd_lp_c, &aom_satd_lp_avx2))); +#endif + +#if HAVE_SSE2 +INSTANTIATE_TEST_SUITE_P( + SSE2, SatdLpTest, + ::testing::Values( + SatdTestParam<SatdLpFunc>(16, &aom_satd_lp_c, &aom_satd_lp_sse2), + SatdTestParam<SatdLpFunc>(64, &aom_satd_lp_c, &aom_satd_lp_sse2), + SatdTestParam<SatdLpFunc>(256, &aom_satd_lp_c, &aom_satd_lp_sse2), + SatdTestParam<SatdLpFunc>(1024, &aom_satd_lp_c, &aom_satd_lp_sse2))); +#endif + +} // namespace |