/* * Copyright (c) 2016, Alliance for Open Media. All rights reserved * * This source code is subject to the terms of the BSD 2 Clause License and * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License * was not distributed with this source code in the LICENSE file, you can * obtain it at www.aomedia.org/license/software. If the Alliance for Open * Media Patent License 1.0 was not distributed with this source code in the * PATENTS file, you can obtain it at www.aomedia.org/license/patent. */ #include #include #include #include #include "third_party/googletest/src/googletest/include/gtest/gtest.h" #include "config/aom_config.h" #include "config/aom_dsp_rtcd.h" #include "test/acm_random.h" #include "test/register_state_check.h" #include "aom/aom_codec.h" #include "aom/aom_integer.h" #include "aom_mem/aom_mem.h" #include "aom_ports/aom_timer.h" #include "aom_ports/mem.h" #include "av1/common/cdef_block.h" namespace { typedef uint64_t (*MseWxH16bitFunc)(uint8_t *dst, int dstride, uint16_t *src, int sstride, int w, int h); typedef uint64_t (*Mse16xH16bitFunc)(uint8_t *dst, int dstride, uint16_t *src, int w, int h); typedef unsigned int (*VarianceMxNFunc)(const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, unsigned int *sse); typedef void (*GetSseSum8x8QuadFunc)(const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, uint32_t *sse8x8, int *sum8x8, unsigned int *tot_sse, int *tot_sum, uint32_t *var8x8); typedef void (*GetSseSum16x16DualFunc)(const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, uint32_t *sse16x16, unsigned int *tot_sse, int *tot_sum, uint32_t *var16x16); typedef unsigned int (*SubpixVarMxNFunc)(const uint8_t *a, int a_stride, int xoffset, int yoffset, const uint8_t *b, int b_stride, unsigned int *sse); typedef unsigned int (*SubpixAvgVarMxNFunc)(const uint8_t *a, int a_stride, int xoffset, int yoffset, const uint8_t *b, int b_stride, uint32_t *sse, const uint8_t *second_pred); typedef unsigned int (*SumOfSquaresFunction)(const int16_t *src); typedef unsigned int (*DistWtdSubpixAvgVarMxNFunc)( const uint8_t *a, int a_stride, int xoffset, int yoffset, const uint8_t *b, int b_stride, uint32_t *sse, const uint8_t *second_pred, const DIST_WTD_COMP_PARAMS *jcp_param); #if !CONFIG_REALTIME_ONLY typedef uint32_t (*ObmcSubpelVarFunc)(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); #endif using libaom_test::ACMRandom; // Truncate high bit depth results by downshifting (with rounding) by: // 2 * (bit_depth - 8) for sse // (bit_depth - 8) for se static void RoundHighBitDepth(int bit_depth, int64_t *se, uint64_t *sse) { switch (bit_depth) { case AOM_BITS_12: *sse = (*sse + 128) >> 8; *se = (*se + 8) >> 4; break; case AOM_BITS_10: *sse = (*sse + 8) >> 4; *se = (*se + 2) >> 2; break; case AOM_BITS_8: default: break; } } static unsigned int mb_ss_ref(const int16_t *src) { unsigned int res = 0; for (int i = 0; i < 256; ++i) { res += src[i] * src[i]; } return res; } /* Note: * Our codebase calculates the "diff" value in the variance algorithm by * (src - ref). */ static uint32_t variance_ref(const uint8_t *src, const uint8_t *ref, int l2w, int l2h, int src_stride, int ref_stride, uint32_t *sse_ptr, bool use_high_bit_depth_, aom_bit_depth_t bit_depth) { int64_t se = 0; uint64_t sse = 0; const int w = 1 << l2w; const int h = 1 << l2h; for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { int diff; if (!use_high_bit_depth_) { diff = src[y * src_stride + x] - ref[y * ref_stride + x]; se += diff; sse += diff * diff; } else { diff = CONVERT_TO_SHORTPTR(src)[y * src_stride + x] - CONVERT_TO_SHORTPTR(ref)[y * ref_stride + x]; se += diff; sse += diff * diff; } } } RoundHighBitDepth(bit_depth, &se, &sse); *sse_ptr = static_cast(sse); return static_cast(sse - ((se * se) >> (l2w + l2h))); } /* The subpel reference functions differ from the codec version in one aspect: * they calculate the bilinear factors directly instead of using a lookup table * and therefore upshift xoff and yoff by 1. Only every other calculated value * is used so the codec version shrinks the table to save space. */ static uint32_t subpel_variance_ref(const uint8_t *ref, const uint8_t *src, int l2w, int l2h, int xoff, int yoff, uint32_t *sse_ptr, bool use_high_bit_depth_, aom_bit_depth_t bit_depth) { int64_t se = 0; uint64_t sse = 0; const int w = 1 << l2w; const int h = 1 << l2h; xoff <<= 1; yoff <<= 1; for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { // Bilinear interpolation at a 16th pel step. if (!use_high_bit_depth_) { const int a1 = ref[(w + 1) * (y + 0) + x + 0]; const int a2 = ref[(w + 1) * (y + 0) + x + 1]; const int b1 = ref[(w + 1) * (y + 1) + x + 0]; const int b2 = ref[(w + 1) * (y + 1) + x + 1]; const int a = a1 + (((a2 - a1) * xoff + 8) >> 4); const int b = b1 + (((b2 - b1) * xoff + 8) >> 4); const int r = a + (((b - a) * yoff + 8) >> 4); const int diff = r - src[w * y + x]; se += diff; sse += diff * diff; } else { uint16_t *ref16 = CONVERT_TO_SHORTPTR(ref); uint16_t *src16 = CONVERT_TO_SHORTPTR(src); const int a1 = ref16[(w + 1) * (y + 0) + x + 0]; const int a2 = ref16[(w + 1) * (y + 0) + x + 1]; const int b1 = ref16[(w + 1) * (y + 1) + x + 0]; const int b2 = ref16[(w + 1) * (y + 1) + x + 1]; const int a = a1 + (((a2 - a1) * xoff + 8) >> 4); const int b = b1 + (((b2 - b1) * xoff + 8) >> 4); const int r = a + (((b - a) * yoff + 8) >> 4); const int diff = r - src16[w * y + x]; se += diff; sse += diff * diff; } } } RoundHighBitDepth(bit_depth, &se, &sse); *sse_ptr = static_cast(sse); return static_cast(sse - ((se * se) >> (l2w + l2h))); } static uint32_t subpel_avg_variance_ref(const uint8_t *ref, const uint8_t *src, const uint8_t *second_pred, int l2w, int l2h, int xoff, int yoff, uint32_t *sse_ptr, bool use_high_bit_depth, aom_bit_depth_t bit_depth) { int64_t se = 0; uint64_t sse = 0; const int w = 1 << l2w; const int h = 1 << l2h; xoff <<= 1; yoff <<= 1; for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { // bilinear interpolation at a 16th pel step if (!use_high_bit_depth) { const int a1 = ref[(w + 1) * (y + 0) + x + 0]; const int a2 = ref[(w + 1) * (y + 0) + x + 1]; const int b1 = ref[(w + 1) * (y + 1) + x + 0]; const int b2 = ref[(w + 1) * (y + 1) + x + 1]; const int a = a1 + (((a2 - a1) * xoff + 8) >> 4); const int b = b1 + (((b2 - b1) * xoff + 8) >> 4); const int r = a + (((b - a) * yoff + 8) >> 4); const int diff = ((r + second_pred[w * y + x] + 1) >> 1) - src[w * y + x]; se += diff; sse += diff * diff; } else { const uint16_t *ref16 = CONVERT_TO_SHORTPTR(ref); const uint16_t *src16 = CONVERT_TO_SHORTPTR(src); const uint16_t *sec16 = CONVERT_TO_SHORTPTR(second_pred); const int a1 = ref16[(w + 1) * (y + 0) + x + 0]; const int a2 = ref16[(w + 1) * (y + 0) + x + 1]; const int b1 = ref16[(w + 1) * (y + 1) + x + 0]; const int b2 = ref16[(w + 1) * (y + 1) + x + 1]; const int a = a1 + (((a2 - a1) * xoff + 8) >> 4); const int b = b1 + (((b2 - b1) * xoff + 8) >> 4); const int r = a + (((b - a) * yoff + 8) >> 4); const int diff = ((r + sec16[w * y + x] + 1) >> 1) - src16[w * y + x]; se += diff; sse += diff * diff; } } } RoundHighBitDepth(bit_depth, &se, &sse); *sse_ptr = static_cast(sse); return static_cast(sse - ((se * se) >> (l2w + l2h))); } static uint32_t dist_wtd_subpel_avg_variance_ref( const uint8_t *ref, const uint8_t *src, const uint8_t *second_pred, int l2w, int l2h, int xoff, int yoff, uint32_t *sse_ptr, bool use_high_bit_depth, aom_bit_depth_t bit_depth, DIST_WTD_COMP_PARAMS *jcp_param) { int64_t se = 0; uint64_t sse = 0; const int w = 1 << l2w; const int h = 1 << l2h; xoff <<= 1; yoff <<= 1; for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { // bilinear interpolation at a 16th pel step if (!use_high_bit_depth) { const int a1 = ref[(w + 0) * (y + 0) + x + 0]; const int a2 = ref[(w + 0) * (y + 0) + x + 1]; const int b1 = ref[(w + 0) * (y + 1) + x + 0]; const int b2 = ref[(w + 0) * (y + 1) + x + 1]; const int a = a1 + (((a2 - a1) * xoff + 8) >> 4); const int b = b1 + (((b2 - b1) * xoff + 8) >> 4); const int r = a + (((b - a) * yoff + 8) >> 4); const int avg = ROUND_POWER_OF_TWO( r * jcp_param->fwd_offset + second_pred[w * y + x] * jcp_param->bck_offset, DIST_PRECISION_BITS); const int diff = avg - src[w * y + x]; se += diff; sse += diff * diff; } else { const uint16_t *ref16 = CONVERT_TO_SHORTPTR(ref); const uint16_t *src16 = CONVERT_TO_SHORTPTR(src); const uint16_t *sec16 = CONVERT_TO_SHORTPTR(second_pred); const int a1 = ref16[(w + 0) * (y + 0) + x + 0]; const int a2 = ref16[(w + 0) * (y + 0) + x + 1]; const int b1 = ref16[(w + 0) * (y + 1) + x + 0]; const int b2 = ref16[(w + 0) * (y + 1) + x + 1]; const int a = a1 + (((a2 - a1) * xoff + 8) >> 4); const int b = b1 + (((b2 - b1) * xoff + 8) >> 4); const int r = a + (((b - a) * yoff + 8) >> 4); const int avg = ROUND_POWER_OF_TWO(r * jcp_param->fwd_offset + sec16[w * y + x] * jcp_param->bck_offset, DIST_PRECISION_BITS); const int diff = avg - src16[w * y + x]; se += diff; sse += diff * diff; } } } RoundHighBitDepth(bit_depth, &se, &sse); *sse_ptr = static_cast(sse); return static_cast(sse - ((se * se) >> (l2w + l2h))); } #if !CONFIG_REALTIME_ONLY static uint32_t obmc_subpel_variance_ref(const uint8_t *pre, int l2w, int l2h, int xoff, int yoff, const int32_t *wsrc, const int32_t *mask, uint32_t *sse_ptr, bool use_high_bit_depth_, aom_bit_depth_t bit_depth) { int64_t se = 0; uint64_t sse = 0; const int w = 1 << l2w; const int h = 1 << l2h; xoff <<= 1; yoff <<= 1; for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { // Bilinear interpolation at a 16th pel step. if (!use_high_bit_depth_) { const int a1 = pre[(w + 1) * (y + 0) + x + 0]; const int a2 = pre[(w + 1) * (y + 0) + x + 1]; const int b1 = pre[(w + 1) * (y + 1) + x + 0]; const int b2 = pre[(w + 1) * (y + 1) + x + 1]; const int a = a1 + (((a2 - a1) * xoff + 8) >> 4); const int b = b1 + (((b2 - b1) * xoff + 8) >> 4); const int r = a + (((b - a) * yoff + 8) >> 4); const int diff = ROUND_POWER_OF_TWO_SIGNED( wsrc[w * y + x] - r * mask[w * y + x], 12); se += diff; sse += diff * diff; } else { uint16_t *pre16 = CONVERT_TO_SHORTPTR(pre); const int a1 = pre16[(w + 1) * (y + 0) + x + 0]; const int a2 = pre16[(w + 1) * (y + 0) + x + 1]; const int b1 = pre16[(w + 1) * (y + 1) + x + 0]; const int b2 = pre16[(w + 1) * (y + 1) + x + 1]; const int a = a1 + (((a2 - a1) * xoff + 8) >> 4); const int b = b1 + (((b2 - b1) * xoff + 8) >> 4); const int r = a + (((b - a) * yoff + 8) >> 4); const int diff = ROUND_POWER_OF_TWO_SIGNED( wsrc[w * y + x] - r * mask[w * y + x], 12); se += diff; sse += diff * diff; } } } RoundHighBitDepth(bit_depth, &se, &sse); *sse_ptr = static_cast(sse); return static_cast(sse - ((se * se) >> (l2w + l2h))); } #endif //////////////////////////////////////////////////////////////////////////////// class SumOfSquaresTest : public ::testing::TestWithParam { public: SumOfSquaresTest() : func_(GetParam()) {} ~SumOfSquaresTest() override = default; protected: void ConstTest(); void RefTest(); SumOfSquaresFunction func_; ACMRandom rnd_; }; void SumOfSquaresTest::ConstTest() { int16_t mem[256]; unsigned int res; for (int v = 0; v < 256; ++v) { for (int i = 0; i < 256; ++i) { mem[i] = v; } API_REGISTER_STATE_CHECK(res = func_(mem)); EXPECT_EQ(256u * (v * v), res); } } void SumOfSquaresTest::RefTest() { int16_t mem[256]; for (int i = 0; i < 100; ++i) { for (int j = 0; j < 256; ++j) { mem[j] = rnd_.Rand8() - rnd_.Rand8(); } const unsigned int expected = mb_ss_ref(mem); unsigned int res; API_REGISTER_STATE_CHECK(res = func_(mem)); EXPECT_EQ(expected, res); } } //////////////////////////////////////////////////////////////////////////////// // Encapsulating struct to store the function to test along with // some testing context. // Can be used for MSE, SSE, Variance, etc. template struct TestParams { TestParams(int log2w = 0, int log2h = 0, Func function = nullptr, int bit_depth_value = 0) : log2width(log2w), log2height(log2h), func(function) { use_high_bit_depth = (bit_depth_value > 0); if (use_high_bit_depth) { bit_depth = static_cast(bit_depth_value); } else { bit_depth = AOM_BITS_8; } width = 1 << log2width; height = 1 << log2height; block_size = width * height; mask = (1u << bit_depth) - 1; } int log2width, log2height; int width, height; int block_size; Func func; aom_bit_depth_t bit_depth; bool use_high_bit_depth; uint32_t mask; }; template std::ostream &operator<<(std::ostream &os, const TestParams &p) { return os << "width/height:" << p.width << "/" << p.height << " function:" << reinterpret_cast(p.func) << " bit-depth:" << p.bit_depth; } // Main class for testing a function type template class MseWxHTestClass : public ::testing::TestWithParam > { public: void SetUp() override { params_ = this->GetParam(); rnd_.Reset(ACMRandom::DeterministicSeed()); src_ = reinterpret_cast( aom_memalign(16, block_size() * sizeof(src_))); dst_ = reinterpret_cast( aom_memalign(16, block_size() * sizeof(dst_))); ASSERT_NE(src_, nullptr); ASSERT_NE(dst_, nullptr); } void TearDown() override { aom_free(src_); aom_free(dst_); src_ = nullptr; dst_ = nullptr; } protected: void RefMatchTestMse(); void SpeedTest(); protected: ACMRandom rnd_; uint8_t *dst_; uint16_t *src_; TestParams params_; // some relay helpers int block_size() const { return params_.block_size; } int width() const { return params_.width; } int height() const { return params_.height; } int d_stride() const { return params_.width; } // stride is same as width int s_stride() const { return params_.width; } // stride is same as width }; template void MseWxHTestClass::SpeedTest() { aom_usec_timer ref_timer, test_timer; double elapsed_time_c = 0; double elapsed_time_simd = 0; int run_time = 10000000; int w = width(); int h = height(); int dstride = d_stride(); int sstride = s_stride(); for (int k = 0; k < block_size(); ++k) { dst_[k] = rnd_.Rand8(); src_[k] = rnd_.Rand8(); } aom_usec_timer_start(&ref_timer); for (int i = 0; i < run_time; i++) { aom_mse_wxh_16bit_c(dst_, dstride, src_, sstride, w, h); } aom_usec_timer_mark(&ref_timer); elapsed_time_c = static_cast(aom_usec_timer_elapsed(&ref_timer)); aom_usec_timer_start(&test_timer); for (int i = 0; i < run_time; i++) { params_.func(dst_, dstride, src_, sstride, w, h); } aom_usec_timer_mark(&test_timer); elapsed_time_simd = static_cast(aom_usec_timer_elapsed(&test_timer)); printf("%dx%d\tc_time=%lf \t simd_time=%lf \t gain=%lf\n", width(), height(), elapsed_time_c, elapsed_time_simd, (elapsed_time_c / elapsed_time_simd)); } template void MseWxHTestClass::RefMatchTestMse() { uint64_t mse_ref = 0; uint64_t mse_mod = 0; int w = width(); int h = height(); int dstride = d_stride(); int sstride = s_stride(); for (int i = 0; i < 10; i++) { for (int k = 0; k < block_size(); ++k) { dst_[k] = rnd_.Rand8(); src_[k] = rnd_.Rand8(); } API_REGISTER_STATE_CHECK( mse_ref = aom_mse_wxh_16bit_c(dst_, dstride, src_, sstride, w, h)); API_REGISTER_STATE_CHECK( mse_mod = params_.func(dst_, dstride, src_, sstride, w, h)); EXPECT_EQ(mse_ref, mse_mod) << "ref mse: " << mse_ref << " mod mse: " << mse_mod; } } template class Mse16xHTestClass : public ::testing::TestWithParam > { public: // Memory required to compute mse of two 8x8 and four 4x4 blocks assigned for // maximum width 16 and maximum height 8. int mem_size = 16 * 8; void SetUp() override { params_ = this->GetParam(); rnd_.Reset(ACMRandom::DeterministicSeed()); src_ = reinterpret_cast( aom_memalign(16, mem_size * sizeof(*src_))); dst_ = reinterpret_cast(aom_memalign(16, mem_size * sizeof(*dst_))); ASSERT_NE(src_, nullptr); ASSERT_NE(dst_, nullptr); } void TearDown() override { aom_free(src_); aom_free(dst_); src_ = nullptr; dst_ = nullptr; } uint8_t RandBool() { const uint32_t value = rnd_.Rand8(); return (value & 0x1); } protected: void RefMatchExtremeTestMse(); void RefMatchTestMse(); void SpeedTest(); protected: ACMRandom rnd_; uint8_t *dst_; uint16_t *src_; TestParams params_; // some relay helpers int width() const { return params_.width; } int height() const { return params_.height; } int d_stride() const { return params_.width; } }; template void Mse16xHTestClass::SpeedTest() { aom_usec_timer ref_timer, test_timer; double elapsed_time_c = 0.0; double elapsed_time_simd = 0.0; const int loop_count = 10000000; const int w = width(); const int h = height(); const int dstride = d_stride(); for (int k = 0; k < mem_size; ++k) { dst_[k] = rnd_.Rand8(); // Right shift by 6 is done to generate more input in range of [0,255] than // CDEF_VERY_LARGE int rnd_i10 = rnd_.Rand16() >> 6; src_[k] = (rnd_i10 < 256) ? rnd_i10 : CDEF_VERY_LARGE; } aom_usec_timer_start(&ref_timer); for (int i = 0; i < loop_count; i++) { aom_mse_16xh_16bit_c(dst_, dstride, src_, w, h); } aom_usec_timer_mark(&ref_timer); elapsed_time_c = static_cast(aom_usec_timer_elapsed(&ref_timer)); aom_usec_timer_start(&test_timer); for (int i = 0; i < loop_count; i++) { params_.func(dst_, dstride, src_, w, h); } aom_usec_timer_mark(&test_timer); elapsed_time_simd = static_cast(aom_usec_timer_elapsed(&test_timer)); printf("%dx%d\tc_time=%lf \t simd_time=%lf \t gain=%.31f\n", width(), height(), elapsed_time_c, elapsed_time_simd, (elapsed_time_c / elapsed_time_simd)); } template void Mse16xHTestClass::RefMatchTestMse() { uint64_t mse_ref = 0; uint64_t mse_mod = 0; const int w = width(); const int h = height(); const int dstride = d_stride(); for (int i = 0; i < 10; i++) { for (int k = 0; k < mem_size; ++k) { dst_[k] = rnd_.Rand8(); // Right shift by 6 is done to generate more input in range of [0,255] // than CDEF_VERY_LARGE int rnd_i10 = rnd_.Rand16() >> 6; src_[k] = (rnd_i10 < 256) ? rnd_i10 : CDEF_VERY_LARGE; } API_REGISTER_STATE_CHECK( mse_ref = aom_mse_16xh_16bit_c(dst_, dstride, src_, w, h)); API_REGISTER_STATE_CHECK(mse_mod = params_.func(dst_, dstride, src_, w, h)); EXPECT_EQ(mse_ref, mse_mod) << "ref mse: " << mse_ref << " mod mse: " << mse_mod; } } template void Mse16xHTestClass::RefMatchExtremeTestMse() { uint64_t mse_ref = 0; uint64_t mse_mod = 0; const int w = width(); const int h = height(); const int dstride = d_stride(); const int iter = 10; // Fill the buffers with extreme values for (int i = 0; i < iter; i++) { for (int k = 0; k < mem_size; ++k) { dst_[k] = static_cast(RandBool() ? 0 : 255); src_[k] = static_cast(RandBool() ? 0 : CDEF_VERY_LARGE); } API_REGISTER_STATE_CHECK( mse_ref = aom_mse_16xh_16bit_c(dst_, dstride, src_, w, h)); API_REGISTER_STATE_CHECK(mse_mod = params_.func(dst_, dstride, src_, w, h)); EXPECT_EQ(mse_ref, mse_mod) << "ref mse: " << mse_ref << " mod mse: " << mse_mod; } } // Main class for testing a function type template class MainTestClass : public ::testing::TestWithParam > { public: void SetUp() override { params_ = this->GetParam(); rnd_.Reset(ACMRandom::DeterministicSeed()); const size_t unit = use_high_bit_depth() ? sizeof(uint16_t) : sizeof(uint8_t); src_ = reinterpret_cast(aom_memalign(16, block_size() * unit)); ref_ = new uint8_t[block_size() * unit]; ASSERT_NE(src_, nullptr); ASSERT_NE(ref_, nullptr); memset(src_, 0, block_size() * sizeof(src_[0])); memset(ref_, 0, block_size() * sizeof(ref_[0])); if (use_high_bit_depth()) { // TODO(skal): remove! src_ = CONVERT_TO_BYTEPTR(src_); ref_ = CONVERT_TO_BYTEPTR(ref_); } } void TearDown() override { if (use_high_bit_depth()) { // TODO(skal): remove! src_ = reinterpret_cast(CONVERT_TO_SHORTPTR(src_)); ref_ = reinterpret_cast(CONVERT_TO_SHORTPTR(ref_)); } aom_free(src_); delete[] ref_; src_ = nullptr; ref_ = nullptr; } protected: // We could sub-class MainTestClass into dedicated class for Variance // and MSE/SSE, but it involves a lot of 'this->xxx' dereferencing // to access top class fields xxx. That's cumbersome, so for now we'll just // implement the testing methods here: // Variance tests void ZeroTest(); void RefTest(); void RefStrideTest(); void OneQuarterTest(); void SpeedTest(); // SSE&SUM tests void RefTestSseSum(); void MinTestSseSum(); void MaxTestSseSum(); void SseSum_SpeedTest(); // SSE&SUM dual tests void RefTestSseSumDual(); void MinTestSseSumDual(); void MaxTestSseSumDual(); void SseSum_SpeedTestDual(); // MSE/SSE tests void RefTestMse(); void RefTestSse(); void MaxTestMse(); void MaxTestSse(); protected: ACMRandom rnd_; uint8_t *src_; uint8_t *ref_; TestParams params_; // some relay helpers bool use_high_bit_depth() const { return params_.use_high_bit_depth; } int byte_shift() const { return params_.bit_depth - 8; } int block_size() const { return params_.block_size; } int width() const { return params_.width; } int height() const { return params_.height; } uint32_t mask() const { return params_.mask; } }; //////////////////////////////////////////////////////////////////////////////// // Tests related to variance. template void MainTestClass::ZeroTest() { for (int i = 0; i <= 255; ++i) { if (!use_high_bit_depth()) { memset(src_, i, block_size()); } else { uint16_t *const src16 = CONVERT_TO_SHORTPTR(src_); for (int k = 0; k < block_size(); ++k) src16[k] = i << byte_shift(); } for (int j = 0; j <= 255; ++j) { if (!use_high_bit_depth()) { memset(ref_, j, block_size()); } else { uint16_t *const ref16 = CONVERT_TO_SHORTPTR(ref_); for (int k = 0; k < block_size(); ++k) ref16[k] = j << byte_shift(); } unsigned int sse, var; API_REGISTER_STATE_CHECK( var = params_.func(src_, width(), ref_, width(), &sse)); EXPECT_EQ(0u, var) << "src values: " << i << " ref values: " << j; } } } template void MainTestClass::RefTest() { for (int i = 0; i < 10; ++i) { for (int j = 0; j < block_size(); j++) { if (!use_high_bit_depth()) { src_[j] = rnd_.Rand8(); ref_[j] = rnd_.Rand8(); } else { CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask(); CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask(); } } unsigned int sse1, sse2, var1, var2; const int stride = width(); API_REGISTER_STATE_CHECK( var1 = params_.func(src_, stride, ref_, stride, &sse1)); var2 = variance_ref(src_, ref_, params_.log2width, params_.log2height, stride, stride, &sse2, use_high_bit_depth(), params_.bit_depth); EXPECT_EQ(sse1, sse2) << "Error at test index: " << i; EXPECT_EQ(var1, var2) << "Error at test index: " << i; } } template void MainTestClass::RefStrideTest() { for (int i = 0; i < 10; ++i) { const int ref_stride = (i & 1) * width(); const int src_stride = ((i >> 1) & 1) * width(); for (int j = 0; j < block_size(); j++) { const int ref_ind = (j / width()) * ref_stride + j % width(); const int src_ind = (j / width()) * src_stride + j % width(); if (!use_high_bit_depth()) { src_[src_ind] = rnd_.Rand8(); ref_[ref_ind] = rnd_.Rand8(); } else { CONVERT_TO_SHORTPTR(src_)[src_ind] = rnd_.Rand16() & mask(); CONVERT_TO_SHORTPTR(ref_)[ref_ind] = rnd_.Rand16() & mask(); } } unsigned int sse1, sse2; unsigned int var1, var2; API_REGISTER_STATE_CHECK( var1 = params_.func(src_, src_stride, ref_, ref_stride, &sse1)); var2 = variance_ref(src_, ref_, params_.log2width, params_.log2height, src_stride, ref_stride, &sse2, use_high_bit_depth(), params_.bit_depth); EXPECT_EQ(sse1, sse2) << "Error at test index: " << i; EXPECT_EQ(var1, var2) << "Error at test index: " << i; } } template void MainTestClass::OneQuarterTest() { const int half = block_size() / 2; if (!use_high_bit_depth()) { memset(src_, 255, block_size()); memset(ref_, 255, half); memset(ref_ + half, 0, half); } else { aom_memset16(CONVERT_TO_SHORTPTR(src_), 255 << byte_shift(), block_size()); aom_memset16(CONVERT_TO_SHORTPTR(ref_), 255 << byte_shift(), half); aom_memset16(CONVERT_TO_SHORTPTR(ref_) + half, 0, half); } unsigned int sse, var, expected; API_REGISTER_STATE_CHECK( var = params_.func(src_, width(), ref_, width(), &sse)); expected = block_size() * 255 * 255 / 4; EXPECT_EQ(expected, var); } template void MainTestClass::SpeedTest() { for (int j = 0; j < block_size(); j++) { if (!use_high_bit_depth()) { src_[j] = rnd_.Rand8(); ref_[j] = rnd_.Rand8(); #if CONFIG_AV1_HIGHBITDEPTH } else { CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask(); CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask(); #endif // CONFIG_AV1_HIGHBITDEPTH } } unsigned int sse; const int stride = width(); int run_time = 1000000000 / block_size(); aom_usec_timer timer; aom_usec_timer_start(&timer); for (int i = 0; i < run_time; ++i) { params_.func(src_, stride, ref_, stride, &sse); } aom_usec_timer_mark(&timer); const int elapsed_time = static_cast(aom_usec_timer_elapsed(&timer)); printf("Variance %dx%d : %d us\n", width(), height(), elapsed_time); } template void MainTestClass::RefTestSseSum() { for (int i = 0; i < 10; ++i) { for (int j = 0; j < block_size(); ++j) { src_[j] = rnd_.Rand8(); ref_[j] = rnd_.Rand8(); } unsigned int sse1[256] = { 0 }; unsigned int sse2[256] = { 0 }; unsigned int var1[256] = { 0 }; unsigned int var2[256] = { 0 }; int sum1[256] = { 0 }; int sum2[256] = { 0 }; unsigned int sse_tot_c = 0; unsigned int sse_tot_simd = 0; int sum_tot_c = 0; int sum_tot_simd = 0; const int stride = width(); int k = 0; for (int row = 0; row < height(); row += 8) { for (int col = 0; col < width(); col += 32) { API_REGISTER_STATE_CHECK(params_.func(src_ + stride * row + col, stride, ref_ + stride * row + col, stride, &sse1[k], &sum1[k], &sse_tot_simd, &sum_tot_simd, &var1[k])); aom_get_var_sse_sum_8x8_quad_c( src_ + stride * row + col, stride, ref_ + stride * row + col, stride, &sse2[k], &sum2[k], &sse_tot_c, &sum_tot_c, &var2[k]); k += 4; } } EXPECT_EQ(sse_tot_c, sse_tot_simd); EXPECT_EQ(sum_tot_c, sum_tot_simd); for (int p = 0; p < 256; p++) { EXPECT_EQ(sse1[p], sse2[p]); EXPECT_EQ(sum1[p], sum2[p]); EXPECT_EQ(var1[p], var2[p]); } } } template void MainTestClass::MinTestSseSum() { memset(src_, 0, block_size()); memset(ref_, 255, block_size()); unsigned int sse1[256] = { 0 }; unsigned int sse2[256] = { 0 }; unsigned int var1[256] = { 0 }; unsigned int var2[256] = { 0 }; int sum1[256] = { 0 }; int sum2[256] = { 0 }; unsigned int sse_tot_c = 0; unsigned int sse_tot_simd = 0; int sum_tot_c = 0; int sum_tot_simd = 0; const int stride = width(); int k = 0; for (int i = 0; i < height(); i += 8) { for (int j = 0; j < width(); j += 32) { API_REGISTER_STATE_CHECK(params_.func( src_ + stride * i + j, stride, ref_ + stride * i + j, stride, &sse1[k], &sum1[k], &sse_tot_simd, &sum_tot_simd, &var1[k])); aom_get_var_sse_sum_8x8_quad_c( src_ + stride * i + j, stride, ref_ + stride * i + j, stride, &sse2[k], &sum2[k], &sse_tot_c, &sum_tot_c, &var2[k]); k += 4; } } EXPECT_EQ(sse_tot_simd, sse_tot_c); EXPECT_EQ(sum_tot_simd, sum_tot_c); for (int p = 0; p < 256; p++) { EXPECT_EQ(sse1[p], sse2[p]); EXPECT_EQ(sum1[p], sum2[p]); EXPECT_EQ(var1[p], var2[p]); } } template void MainTestClass::MaxTestSseSum() { memset(src_, 255, block_size()); memset(ref_, 0, block_size()); unsigned int sse1[256] = { 0 }; unsigned int sse2[256] = { 0 }; unsigned int var1[256] = { 0 }; unsigned int var2[256] = { 0 }; int sum1[256] = { 0 }; int sum2[256] = { 0 }; unsigned int sse_tot_c = 0; unsigned int sse_tot_simd = 0; int sum_tot_c = 0; int sum_tot_simd = 0; const int stride = width(); int k = 0; for (int i = 0; i < height(); i += 8) { for (int j = 0; j < width(); j += 32) { API_REGISTER_STATE_CHECK(params_.func( src_ + stride * i + j, stride, ref_ + stride * i + j, stride, &sse1[k], &sum1[k], &sse_tot_simd, &sum_tot_simd, &var1[k])); aom_get_var_sse_sum_8x8_quad_c( src_ + stride * i + j, stride, ref_ + stride * i + j, stride, &sse2[k], &sum2[k], &sse_tot_c, &sum_tot_c, &var2[k]); k += 4; } } EXPECT_EQ(sse_tot_c, sse_tot_simd); EXPECT_EQ(sum_tot_c, sum_tot_simd); for (int p = 0; p < 256; p++) { EXPECT_EQ(sse1[p], sse2[p]); EXPECT_EQ(sum1[p], sum2[p]); EXPECT_EQ(var1[p], var2[p]); } } template void MainTestClass::SseSum_SpeedTest() { const int loop_count = 1000000000 / block_size(); for (int j = 0; j < block_size(); ++j) { src_[j] = rnd_.Rand8(); ref_[j] = rnd_.Rand8(); } unsigned int sse1[4] = { 0 }; unsigned int sse2[4] = { 0 }; unsigned int var1[4] = { 0 }; unsigned int var2[4] = { 0 }; int sum1[4] = { 0 }; int sum2[4] = { 0 }; unsigned int sse_tot_c = 0; unsigned int sse_tot_simd = 0; int sum_tot_c = 0; int sum_tot_simd = 0; const int stride = width(); aom_usec_timer timer; aom_usec_timer_start(&timer); for (int r = 0; r < loop_count; ++r) { for (int i = 0; i < height(); i += 8) { for (int j = 0; j < width(); j += 32) { aom_get_var_sse_sum_8x8_quad_c(src_ + stride * i + j, stride, ref_ + stride * i + j, stride, sse2, sum2, &sse_tot_c, &sum_tot_c, var2); } } } aom_usec_timer_mark(&timer); const double elapsed_time_ref = static_cast(aom_usec_timer_elapsed(&timer)); aom_usec_timer_start(&timer); for (int r = 0; r < loop_count; ++r) { for (int i = 0; i < height(); i += 8) { for (int j = 0; j < width(); j += 32) { params_.func(src_ + stride * i + j, stride, ref_ + stride * i + j, stride, sse1, sum1, &sse_tot_simd, &sum_tot_simd, var1); } } } aom_usec_timer_mark(&timer); const double elapsed_time_simd = static_cast(aom_usec_timer_elapsed(&timer)); printf( "aom_getvar_8x8_quad for block=%dx%d : ref_time=%lf \t simd_time=%lf \t " "gain=%lf \n", width(), height(), elapsed_time_ref, elapsed_time_simd, elapsed_time_ref / elapsed_time_simd); } template void MainTestClass::RefTestSseSumDual() { for (int iter = 0; iter < 10; ++iter) { for (int idx = 0; idx < block_size(); ++idx) { src_[idx] = rnd_.Rand8(); ref_[idx] = rnd_.Rand8(); } unsigned int sse1[64] = { 0 }; unsigned int sse2[64] = { 0 }; unsigned int var1[64] = { 0 }; unsigned int var2[64] = { 0 }; unsigned int sse_tot_c = 0; unsigned int sse_tot_simd = 0; int sum_tot_c = 0; int sum_tot_simd = 0; const int stride = width(); int k = 0; for (int row = 0; row < height(); row += 16) { for (int col = 0; col < width(); col += 32) { API_REGISTER_STATE_CHECK(params_.func( src_ + stride * row + col, stride, ref_ + stride * row + col, stride, &sse1[k], &sse_tot_simd, &sum_tot_simd, &var1[k])); aom_get_var_sse_sum_16x16_dual_c( src_ + stride * row + col, stride, ref_ + stride * row + col, stride, &sse2[k], &sse_tot_c, &sum_tot_c, &var2[k]); k += 2; } } EXPECT_EQ(sse_tot_c, sse_tot_simd); EXPECT_EQ(sum_tot_c, sum_tot_simd); for (int p = 0; p < 64; p++) { EXPECT_EQ(sse1[p], sse2[p]); EXPECT_EQ(sse_tot_simd, sse_tot_c); EXPECT_EQ(sum_tot_simd, sum_tot_c); EXPECT_EQ(var1[p], var2[p]); } } } template void MainTestClass::MinTestSseSumDual() { memset(src_, 0, block_size()); memset(ref_, 255, block_size()); unsigned int sse1[64] = { 0 }; unsigned int sse2[64] = { 0 }; unsigned int var1[64] = { 0 }; unsigned int var2[64] = { 0 }; unsigned int sse_tot_c = 0; unsigned int sse_tot_simd = 0; int sum_tot_c = 0; int sum_tot_simd = 0; const int stride = width(); int k = 0; for (int row = 0; row < height(); row += 16) { for (int col = 0; col < width(); col += 32) { API_REGISTER_STATE_CHECK(params_.func( src_ + stride * row + col, stride, ref_ + stride * row + col, stride, &sse1[k], &sse_tot_simd, &sum_tot_simd, &var1[k])); aom_get_var_sse_sum_16x16_dual_c( src_ + stride * row + col, stride, ref_ + stride * row + col, stride, &sse2[k], &sse_tot_c, &sum_tot_c, &var2[k]); k += 2; } } EXPECT_EQ(sse_tot_simd, sse_tot_c); EXPECT_EQ(sum_tot_simd, sum_tot_c); for (int p = 0; p < 64; p++) { EXPECT_EQ(sse1[p], sse2[p]); EXPECT_EQ(var1[p], var2[p]); } } template void MainTestClass::MaxTestSseSumDual() { memset(src_, 255, block_size()); memset(ref_, 0, block_size()); unsigned int sse1[64] = { 0 }; unsigned int sse2[64] = { 0 }; unsigned int var1[64] = { 0 }; unsigned int var2[64] = { 0 }; unsigned int sse_tot_c = 0; unsigned int sse_tot_simd = 0; int sum_tot_c = 0; int sum_tot_simd = 0; const int stride = width(); int k = 0; for (int row = 0; row < height(); row += 16) { for (int col = 0; col < width(); col += 32) { API_REGISTER_STATE_CHECK(params_.func( src_ + stride * row + col, stride, ref_ + stride * row + col, stride, &sse1[k], &sse_tot_simd, &sum_tot_simd, &var1[k])); aom_get_var_sse_sum_16x16_dual_c( src_ + stride * row + col, stride, ref_ + stride * row + col, stride, &sse2[k], &sse_tot_c, &sum_tot_c, &var2[k]); k += 2; } } EXPECT_EQ(sse_tot_c, sse_tot_simd); EXPECT_EQ(sum_tot_c, sum_tot_simd); for (int p = 0; p < 64; p++) { EXPECT_EQ(sse1[p], sse2[p]); EXPECT_EQ(var1[p], var2[p]); } } template void MainTestClass::SseSum_SpeedTestDual() { const int loop_count = 1000000000 / block_size(); for (int idx = 0; idx < block_size(); ++idx) { src_[idx] = rnd_.Rand8(); ref_[idx] = rnd_.Rand8(); } unsigned int sse1[2] = { 0 }; unsigned int sse2[2] = { 0 }; unsigned int var1[2] = { 0 }; unsigned int var2[2] = { 0 }; unsigned int sse_tot_c = 0; unsigned int sse_tot_simd = 0; int sum_tot_c = 0; int sum_tot_simd = 0; const int stride = width(); aom_usec_timer timer; aom_usec_timer_start(&timer); for (int r = 0; r < loop_count; ++r) { for (int row = 0; row < height(); row += 16) { for (int col = 0; col < width(); col += 32) { aom_get_var_sse_sum_16x16_dual_c(src_ + stride * row + col, stride, ref_ + stride * row + col, stride, sse2, &sse_tot_c, &sum_tot_c, var2); } } } aom_usec_timer_mark(&timer); const double elapsed_time_ref = static_cast(aom_usec_timer_elapsed(&timer)); aom_usec_timer_start(&timer); for (int r = 0; r < loop_count; ++r) { for (int row = 0; row < height(); row += 16) { for (int col = 0; col < width(); col += 32) { params_.func(src_ + stride * row + col, stride, ref_ + stride * row + col, stride, sse1, &sse_tot_simd, &sum_tot_simd, var1); } } } aom_usec_timer_mark(&timer); const double elapsed_time_simd = static_cast(aom_usec_timer_elapsed(&timer)); printf( "aom_getvar_16x16_dual for block=%dx%d : ref_time=%lf \t simd_time=%lf " "\t " "gain=%lf \n", width(), height(), elapsed_time_ref, elapsed_time_simd, elapsed_time_ref / elapsed_time_simd); } //////////////////////////////////////////////////////////////////////////////// // Tests related to MSE / SSE. template void MainTestClass::RefTestMse() { for (int i = 0; i < 10; ++i) { for (int j = 0; j < block_size(); ++j) { if (!use_high_bit_depth()) { src_[j] = rnd_.Rand8(); ref_[j] = rnd_.Rand8(); #if CONFIG_AV1_HIGHBITDEPTH } else { CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask(); CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask(); #endif // CONFIG_AV1_HIGHBITDEPTH } } unsigned int sse1, sse2; const int stride = width(); API_REGISTER_STATE_CHECK(params_.func(src_, stride, ref_, stride, &sse1)); variance_ref(src_, ref_, params_.log2width, params_.log2height, stride, stride, &sse2, use_high_bit_depth(), params_.bit_depth); EXPECT_EQ(sse1, sse2); } } template void MainTestClass::RefTestSse() { for (int i = 0; i < 10; ++i) { for (int j = 0; j < block_size(); ++j) { src_[j] = rnd_.Rand8(); ref_[j] = rnd_.Rand8(); } unsigned int sse2; unsigned int var1; const int stride = width(); API_REGISTER_STATE_CHECK(var1 = params_.func(src_, stride, ref_, stride)); variance_ref(src_, ref_, params_.log2width, params_.log2height, stride, stride, &sse2, false, AOM_BITS_8); EXPECT_EQ(var1, sse2); } } template void MainTestClass::MaxTestMse() { int max_value = (1 << params_.bit_depth) - 1; if (!use_high_bit_depth()) { memset(src_, max_value, block_size()); memset(ref_, 0, block_size()); #if CONFIG_AV1_HIGHBITDEPTH } else { aom_memset16(CONVERT_TO_SHORTPTR(src_), max_value, block_size()); aom_memset16(CONVERT_TO_SHORTPTR(ref_), 0, block_size()); #endif // CONFIG_AV1_HIGHBITDEPTH } unsigned int sse; API_REGISTER_STATE_CHECK(params_.func(src_, width(), ref_, width(), &sse)); unsigned int expected = (unsigned int)block_size() * max_value * max_value; switch (params_.bit_depth) { case AOM_BITS_12: expected = ROUND_POWER_OF_TWO(expected, 8); break; case AOM_BITS_10: expected = ROUND_POWER_OF_TWO(expected, 4); break; case AOM_BITS_8: default: break; } EXPECT_EQ(expected, sse); } template void MainTestClass::MaxTestSse() { memset(src_, 255, block_size()); memset(ref_, 0, block_size()); unsigned int var; API_REGISTER_STATE_CHECK(var = params_.func(src_, width(), ref_, width())); const unsigned int expected = block_size() * 255 * 255; EXPECT_EQ(expected, var); } //////////////////////////////////////////////////////////////////////////////// using std::get; using std::make_tuple; using std::tuple; template class SubpelVarianceTest : public ::testing::TestWithParam > { public: void SetUp() override { params_ = this->GetParam(); rnd_.Reset(ACMRandom::DeterministicSeed()); if (!use_high_bit_depth()) { src_ = reinterpret_cast(aom_memalign(32, block_size())); sec_ = reinterpret_cast(aom_memalign(32, block_size())); ref_ = reinterpret_cast( aom_memalign(32, block_size() + width() + height() + 1)); } else { src_ = CONVERT_TO_BYTEPTR(reinterpret_cast( aom_memalign(32, block_size() * sizeof(uint16_t)))); sec_ = CONVERT_TO_BYTEPTR(reinterpret_cast( aom_memalign(32, block_size() * sizeof(uint16_t)))); ref_ = CONVERT_TO_BYTEPTR(aom_memalign( 32, (block_size() + width() + height() + 1) * sizeof(uint16_t))); } ASSERT_NE(src_, nullptr); ASSERT_NE(sec_, nullptr); ASSERT_NE(ref_, nullptr); } void TearDown() override { if (!use_high_bit_depth()) { aom_free(src_); aom_free(ref_); aom_free(sec_); } else { aom_free(CONVERT_TO_SHORTPTR(src_)); aom_free(CONVERT_TO_SHORTPTR(ref_)); aom_free(CONVERT_TO_SHORTPTR(sec_)); } } protected: void RefTest(); void ExtremeRefTest(); void SpeedTest(); ACMRandom rnd_; uint8_t *src_; uint8_t *ref_; uint8_t *sec_; TestParams params_; DIST_WTD_COMP_PARAMS jcp_param_; // some relay helpers bool use_high_bit_depth() const { return params_.use_high_bit_depth; } int byte_shift() const { return params_.bit_depth - 8; } int block_size() const { return params_.block_size; } int width() const { return params_.width; } int height() const { return params_.height; } uint32_t mask() const { return params_.mask; } }; template void SubpelVarianceTest::RefTest() { for (int x = 0; x < 8; ++x) { for (int y = 0; y < 8; ++y) { if (!use_high_bit_depth()) { for (int j = 0; j < block_size(); j++) { src_[j] = rnd_.Rand8(); } for (int j = 0; j < block_size() + width() + height() + 1; j++) { ref_[j] = rnd_.Rand8(); } } else { for (int j = 0; j < block_size(); j++) { CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask(); } for (int j = 0; j < block_size() + width() + height() + 1; j++) { CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask(); } } unsigned int sse1, sse2; unsigned int var1; API_REGISTER_STATE_CHECK( var1 = params_.func(ref_, width() + 1, x, y, src_, width(), &sse1)); const unsigned int var2 = subpel_variance_ref( ref_, src_, params_.log2width, params_.log2height, x, y, &sse2, use_high_bit_depth(), params_.bit_depth); EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y; EXPECT_EQ(var1, var2) << "at position " << x << ", " << y; } } } template void SubpelVarianceTest::ExtremeRefTest() { // Compare against reference. // Src: Set the first half of values to 0, the second half to the maximum. // Ref: Set the first half of values to the maximum, the second half to 0. for (int x = 0; x < 8; ++x) { for (int y = 0; y < 8; ++y) { const int half = block_size() / 2; if (!use_high_bit_depth()) { memset(src_, 0, half); memset(src_ + half, 255, half); memset(ref_, 255, half); memset(ref_ + half, 0, half + width() + height() + 1); } else { aom_memset16(CONVERT_TO_SHORTPTR(src_), mask(), half); aom_memset16(CONVERT_TO_SHORTPTR(src_) + half, 0, half); aom_memset16(CONVERT_TO_SHORTPTR(ref_), 0, half); aom_memset16(CONVERT_TO_SHORTPTR(ref_) + half, mask(), half + width() + height() + 1); } unsigned int sse1, sse2; unsigned int var1; API_REGISTER_STATE_CHECK( var1 = params_.func(ref_, width() + 1, x, y, src_, width(), &sse1)); const unsigned int var2 = subpel_variance_ref( ref_, src_, params_.log2width, params_.log2height, x, y, &sse2, use_high_bit_depth(), params_.bit_depth); EXPECT_EQ(sse1, sse2) << "for xoffset " << x << " and yoffset " << y; EXPECT_EQ(var1, var2) << "for xoffset " << x << " and yoffset " << y; } } } template void SubpelVarianceTest::SpeedTest() { if (!use_high_bit_depth()) { for (int j = 0; j < block_size(); j++) { src_[j] = rnd_.Rand8(); } for (int j = 0; j < block_size() + width() + height() + 1; j++) { ref_[j] = rnd_.Rand8(); } } else { for (int j = 0; j < block_size(); j++) { CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask(); } for (int j = 0; j < block_size() + width() + height() + 1; j++) { CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask(); } } unsigned int sse1, sse2; int run_time = 1000000000 / block_size(); aom_usec_timer timer; aom_usec_timer_start(&timer); for (int i = 0; i < run_time; ++i) { int x = rnd_(8); int y = rnd_(8); params_.func(ref_, width() + 1, x, y, src_, width(), &sse1); } aom_usec_timer_mark(&timer); const int elapsed_time = static_cast(aom_usec_timer_elapsed(&timer)); aom_usec_timer timer_c; aom_usec_timer_start(&timer_c); for (int i = 0; i < run_time; ++i) { int x = rnd_(8); int y = rnd_(8); subpel_variance_ref(ref_, src_, params_.log2width, params_.log2height, x, y, &sse2, use_high_bit_depth(), params_.bit_depth); } aom_usec_timer_mark(&timer_c); const int elapsed_time_c = static_cast(aom_usec_timer_elapsed(&timer_c)); printf( "sub_pixel_variance_%dx%d_%d: ref_time=%d us opt_time=%d us gain=%d \n", width(), height(), params_.bit_depth, elapsed_time_c, elapsed_time, elapsed_time_c / elapsed_time); } template <> void SubpelVarianceTest::RefTest() { for (int x = 0; x < 8; ++x) { for (int y = 0; y < 8; ++y) { if (!use_high_bit_depth()) { for (int j = 0; j < block_size(); j++) { src_[j] = rnd_.Rand8(); sec_[j] = rnd_.Rand8(); } for (int j = 0; j < block_size() + width() + height() + 1; j++) { ref_[j] = rnd_.Rand8(); } } else { for (int j = 0; j < block_size(); j++) { CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask(); CONVERT_TO_SHORTPTR(sec_)[j] = rnd_.Rand16() & mask(); } for (int j = 0; j < block_size() + width() + height() + 1; j++) { CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask(); } } uint32_t sse1, sse2; uint32_t var1, var2; API_REGISTER_STATE_CHECK(var1 = params_.func(ref_, width() + 1, x, y, src_, width(), &sse1, sec_)); var2 = subpel_avg_variance_ref(ref_, src_, sec_, params_.log2width, params_.log2height, x, y, &sse2, use_high_bit_depth(), params_.bit_depth); EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y; EXPECT_EQ(var1, var2) << "at position " << x << ", " << y; } } } template <> void SubpelVarianceTest::RefTest() { for (int x = 0; x < 8; ++x) { for (int y = 0; y < 8; ++y) { if (!use_high_bit_depth()) { for (int j = 0; j < block_size(); j++) { src_[j] = rnd_.Rand8(); sec_[j] = rnd_.Rand8(); } for (int j = 0; j < block_size() + width() + height() + 1; j++) { ref_[j] = rnd_.Rand8(); } } else { for (int j = 0; j < block_size(); j++) { CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask(); CONVERT_TO_SHORTPTR(sec_)[j] = rnd_.Rand16() & mask(); } for (int j = 0; j < block_size() + width() + height() + 1; j++) { CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask(); } } for (int x0 = 0; x0 < 2; ++x0) { for (int y0 = 0; y0 < 4; ++y0) { uint32_t sse1, sse2; uint32_t var1, var2; jcp_param_.fwd_offset = quant_dist_lookup_table[y0][x0]; jcp_param_.bck_offset = quant_dist_lookup_table[y0][1 - x0]; API_REGISTER_STATE_CHECK(var1 = params_.func(ref_, width() + 0, x, y, src_, width(), &sse1, sec_, &jcp_param_)); var2 = dist_wtd_subpel_avg_variance_ref( ref_, src_, sec_, params_.log2width, params_.log2height, x, y, &sse2, use_high_bit_depth(), params_.bit_depth, &jcp_param_); EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y; EXPECT_EQ(var1, var2) << "at position " << x << ", " << y; } } } } } //////////////////////////////////////////////////////////////////////////////// #if !CONFIG_REALTIME_ONLY static const int kMaskMax = 64; typedef TestParams ObmcSubpelVarianceParams; template class ObmcVarianceTest : public ::testing::TestWithParam > { public: void SetUp() override { params_ = this->GetParam(); rnd_.Reset(ACMRandom::DeterministicSeed()); if (!use_high_bit_depth()) { pre_ = reinterpret_cast( aom_memalign(32, block_size() + width() + height() + 1)); } else { pre_ = CONVERT_TO_BYTEPTR(reinterpret_cast(aom_memalign( 32, (block_size() + width() + height() + 1) * sizeof(uint16_t)))); } wsrc_ = reinterpret_cast( aom_memalign(32, block_size() * sizeof(uint32_t))); mask_ = reinterpret_cast( aom_memalign(32, block_size() * sizeof(uint32_t))); ASSERT_NE(pre_, nullptr); ASSERT_NE(wsrc_, nullptr); ASSERT_NE(mask_, nullptr); } void TearDown() override { if (!use_high_bit_depth()) { aom_free(pre_); } else { aom_free(CONVERT_TO_SHORTPTR(pre_)); } aom_free(wsrc_); aom_free(mask_); } protected: void RefTest(); void ExtremeRefTest(); void SpeedTest(); ACMRandom rnd_; uint8_t *pre_; int32_t *wsrc_; int32_t *mask_; TestParams params_; // some relay helpers bool use_high_bit_depth() const { return params_.use_high_bit_depth; } int byte_shift() const { return params_.bit_depth - 8; } int block_size() const { return params_.block_size; } int width() const { return params_.width; } int height() const { return params_.height; } uint32_t bd_mask() const { return params_.mask; } }; template <> void ObmcVarianceTest::RefTest() { for (int x = 0; x < 8; ++x) { for (int y = 0; y < 8; ++y) { if (!use_high_bit_depth()) for (int j = 0; j < block_size() + width() + height() + 1; j++) pre_[j] = rnd_.Rand8(); else for (int j = 0; j < block_size() + width() + height() + 1; j++) CONVERT_TO_SHORTPTR(pre_)[j] = rnd_.Rand16() & bd_mask(); for (int j = 0; j < block_size(); j++) { wsrc_[j] = (rnd_.Rand16() & bd_mask()) * rnd_(kMaskMax * kMaskMax + 1); mask_[j] = rnd_(kMaskMax * kMaskMax + 1); } uint32_t sse1, sse2; uint32_t var1, var2; API_REGISTER_STATE_CHECK( var1 = params_.func(pre_, width() + 1, x, y, wsrc_, mask_, &sse1)); var2 = obmc_subpel_variance_ref( pre_, params_.log2width, params_.log2height, x, y, wsrc_, mask_, &sse2, use_high_bit_depth(), params_.bit_depth); EXPECT_EQ(sse1, sse2) << "for xoffset " << x << " and yoffset " << y; EXPECT_EQ(var1, var2) << "for xoffset " << x << " and yoffset " << y; } } } template <> void ObmcVarianceTest::ExtremeRefTest() { // Pre: Set the first half of values to the maximum, the second half to 0. // Mask: same as above // WSrc: Set the first half of values to 0, the second half to the maximum. for (int x = 0; x < 8; ++x) { for (int y = 0; y < 8; ++y) { const int half = block_size() / 2; if (!use_high_bit_depth()) { memset(pre_, 255, half); memset(pre_ + half, 0, half + width() + height() + 1); } else { aom_memset16(CONVERT_TO_SHORTPTR(pre_), bd_mask(), half); aom_memset16(CONVERT_TO_SHORTPTR(pre_) + half, 0, half + width() + height() + 1); } for (int j = 0; j < half; j++) { wsrc_[j] = bd_mask() * kMaskMax * kMaskMax; mask_[j] = 0; } for (int j = half; j < block_size(); j++) { wsrc_[j] = 0; mask_[j] = kMaskMax * kMaskMax; } uint32_t sse1, sse2; uint32_t var1, var2; API_REGISTER_STATE_CHECK( var1 = params_.func(pre_, width() + 1, x, y, wsrc_, mask_, &sse1)); var2 = obmc_subpel_variance_ref( pre_, params_.log2width, params_.log2height, x, y, wsrc_, mask_, &sse2, use_high_bit_depth(), params_.bit_depth); EXPECT_EQ(sse1, sse2) << "for xoffset " << x << " and yoffset " << y; EXPECT_EQ(var1, var2) << "for xoffset " << x << " and yoffset " << y; } } } template <> void ObmcVarianceTest::SpeedTest() { if (!use_high_bit_depth()) for (int j = 0; j < block_size() + width() + height() + 1; j++) pre_[j] = rnd_.Rand8(); else for (int j = 0; j < block_size() + width() + height() + 1; j++) CONVERT_TO_SHORTPTR(pre_)[j] = rnd_.Rand16() & bd_mask(); for (int j = 0; j < block_size(); j++) { wsrc_[j] = (rnd_.Rand16() & bd_mask()) * rnd_(kMaskMax * kMaskMax + 1); mask_[j] = rnd_(kMaskMax * kMaskMax + 1); } unsigned int sse1; const int stride = width() + 1; int run_time = 1000000000 / block_size(); aom_usec_timer timer; aom_usec_timer_start(&timer); for (int i = 0; i < run_time; ++i) { int x = rnd_(8); int y = rnd_(8); API_REGISTER_STATE_CHECK( params_.func(pre_, stride, x, y, wsrc_, mask_, &sse1)); } aom_usec_timer_mark(&timer); const int elapsed_time = static_cast(aom_usec_timer_elapsed(&timer)); printf("obmc_sub_pixel_variance_%dx%d_%d: %d us\n", width(), height(), params_.bit_depth, elapsed_time); } #endif // !CONFIG_REALTIME_ONLY typedef MseWxHTestClass MseWxHTest; typedef Mse16xHTestClass Mse16xHTest; typedef MainTestClass AvxMseTest; typedef MainTestClass AvxVarianceTest; typedef MainTestClass GetSseSum8x8QuadTest; typedef MainTestClass GetSseSum16x16DualTest; typedef SubpelVarianceTest AvxSubpelVarianceTest; typedef SubpelVarianceTest AvxSubpelAvgVarianceTest; typedef SubpelVarianceTest AvxDistWtdSubpelAvgVarianceTest; #if !CONFIG_REALTIME_ONLY typedef ObmcVarianceTest AvxObmcSubpelVarianceTest; #endif typedef TestParams MseWxHParams; typedef TestParams Mse16xHParams; TEST_P(MseWxHTest, RefMse) { RefMatchTestMse(); } TEST_P(MseWxHTest, DISABLED_SpeedMse) { SpeedTest(); } TEST_P(Mse16xHTest, RefMse) { RefMatchTestMse(); } TEST_P(Mse16xHTest, RefMseExtreme) { RefMatchExtremeTestMse(); } TEST_P(Mse16xHTest, DISABLED_SpeedMse) { SpeedTest(); } TEST_P(AvxMseTest, RefMse) { RefTestMse(); } TEST_P(AvxMseTest, MaxMse) { MaxTestMse(); } TEST_P(AvxVarianceTest, Zero) { ZeroTest(); } TEST_P(AvxVarianceTest, Ref) { RefTest(); } TEST_P(AvxVarianceTest, RefStride) { RefStrideTest(); } TEST_P(AvxVarianceTest, OneQuarter) { OneQuarterTest(); } TEST_P(AvxVarianceTest, DISABLED_Speed) { SpeedTest(); } TEST_P(GetSseSum8x8QuadTest, RefMseSum) { RefTestSseSum(); } TEST_P(GetSseSum8x8QuadTest, MinSseSum) { MinTestSseSum(); } TEST_P(GetSseSum8x8QuadTest, MaxMseSum) { MaxTestSseSum(); } TEST_P(GetSseSum8x8QuadTest, DISABLED_Speed) { SseSum_SpeedTest(); } TEST_P(GetSseSum16x16DualTest, RefMseSum) { RefTestSseSumDual(); } TEST_P(GetSseSum16x16DualTest, MinSseSum) { MinTestSseSumDual(); } TEST_P(GetSseSum16x16DualTest, MaxMseSum) { MaxTestSseSumDual(); } TEST_P(GetSseSum16x16DualTest, DISABLED_Speed) { SseSum_SpeedTestDual(); } TEST_P(SumOfSquaresTest, Const) { ConstTest(); } TEST_P(SumOfSquaresTest, Ref) { RefTest(); } TEST_P(AvxSubpelVarianceTest, Ref) { RefTest(); } TEST_P(AvxSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); } TEST_P(AvxSubpelVarianceTest, DISABLED_Speed) { SpeedTest(); } TEST_P(AvxSubpelAvgVarianceTest, Ref) { RefTest(); } TEST_P(AvxDistWtdSubpelAvgVarianceTest, Ref) { RefTest(); } #if !CONFIG_REALTIME_ONLY TEST_P(AvxObmcSubpelVarianceTest, Ref) { RefTest(); } TEST_P(AvxObmcSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); } TEST_P(AvxObmcSubpelVarianceTest, DISABLED_Speed) { SpeedTest(); } #endif INSTANTIATE_TEST_SUITE_P( C, MseWxHTest, ::testing::Values(MseWxHParams(3, 3, &aom_mse_wxh_16bit_c, 8), MseWxHParams(3, 2, &aom_mse_wxh_16bit_c, 8), MseWxHParams(2, 3, &aom_mse_wxh_16bit_c, 8), MseWxHParams(2, 2, &aom_mse_wxh_16bit_c, 8))); INSTANTIATE_TEST_SUITE_P( C, Mse16xHTest, ::testing::Values(Mse16xHParams(3, 3, &aom_mse_16xh_16bit_c, 8), Mse16xHParams(3, 2, &aom_mse_16xh_16bit_c, 8), Mse16xHParams(2, 3, &aom_mse_16xh_16bit_c, 8), Mse16xHParams(2, 2, &aom_mse_16xh_16bit_c, 8))); INSTANTIATE_TEST_SUITE_P(C, SumOfSquaresTest, ::testing::Values(aom_get_mb_ss_c)); typedef TestParams MseParams; INSTANTIATE_TEST_SUITE_P(C, AvxMseTest, ::testing::Values(MseParams(4, 4, &aom_mse16x16_c), MseParams(4, 3, &aom_mse16x8_c), MseParams(3, 4, &aom_mse8x16_c), MseParams(3, 3, &aom_mse8x8_c))); typedef TestParams VarianceParams; const VarianceParams kArrayVariance_c[] = { VarianceParams(7, 7, &aom_variance128x128_c), VarianceParams(7, 6, &aom_variance128x64_c), VarianceParams(6, 7, &aom_variance64x128_c), VarianceParams(6, 6, &aom_variance64x64_c), VarianceParams(6, 5, &aom_variance64x32_c), VarianceParams(5, 6, &aom_variance32x64_c), VarianceParams(5, 5, &aom_variance32x32_c), VarianceParams(5, 4, &aom_variance32x16_c), VarianceParams(4, 5, &aom_variance16x32_c), VarianceParams(4, 4, &aom_variance16x16_c), VarianceParams(4, 3, &aom_variance16x8_c), VarianceParams(3, 4, &aom_variance8x16_c), VarianceParams(3, 3, &aom_variance8x8_c), VarianceParams(3, 2, &aom_variance8x4_c), VarianceParams(2, 3, &aom_variance4x8_c), VarianceParams(2, 2, &aom_variance4x4_c), #if !CONFIG_REALTIME_ONLY VarianceParams(6, 4, &aom_variance64x16_c), VarianceParams(4, 6, &aom_variance16x64_c), VarianceParams(5, 3, &aom_variance32x8_c), VarianceParams(3, 5, &aom_variance8x32_c), VarianceParams(4, 2, &aom_variance16x4_c), VarianceParams(2, 4, &aom_variance4x16_c), #endif }; INSTANTIATE_TEST_SUITE_P(C, AvxVarianceTest, ::testing::ValuesIn(kArrayVariance_c)); typedef TestParams GetSseSumParams; const GetSseSumParams kArrayGetSseSum8x8Quad_c[] = { GetSseSumParams(7, 7, &aom_get_var_sse_sum_8x8_quad_c, 0), GetSseSumParams(6, 6, &aom_get_var_sse_sum_8x8_quad_c, 0), GetSseSumParams(5, 5, &aom_get_var_sse_sum_8x8_quad_c, 0), GetSseSumParams(5, 4, &aom_get_var_sse_sum_8x8_quad_c, 0) }; INSTANTIATE_TEST_SUITE_P(C, GetSseSum8x8QuadTest, ::testing::ValuesIn(kArrayGetSseSum8x8Quad_c)); typedef TestParams GetSseSumParamsDual; const GetSseSumParamsDual kArrayGetSseSum16x16Dual_c[] = { GetSseSumParamsDual(7, 7, &aom_get_var_sse_sum_16x16_dual_c, 0), GetSseSumParamsDual(6, 6, &aom_get_var_sse_sum_16x16_dual_c, 0), GetSseSumParamsDual(5, 5, &aom_get_var_sse_sum_16x16_dual_c, 0), GetSseSumParamsDual(5, 4, &aom_get_var_sse_sum_16x16_dual_c, 0) }; INSTANTIATE_TEST_SUITE_P(C, GetSseSum16x16DualTest, ::testing::ValuesIn(kArrayGetSseSum16x16Dual_c)); typedef TestParams SubpelVarianceParams; const SubpelVarianceParams kArraySubpelVariance_c[] = { SubpelVarianceParams(7, 7, &aom_sub_pixel_variance128x128_c, 0), SubpelVarianceParams(7, 6, &aom_sub_pixel_variance128x64_c, 0), SubpelVarianceParams(6, 7, &aom_sub_pixel_variance64x128_c, 0), SubpelVarianceParams(6, 6, &aom_sub_pixel_variance64x64_c, 0), SubpelVarianceParams(6, 5, &aom_sub_pixel_variance64x32_c, 0), SubpelVarianceParams(5, 6, &aom_sub_pixel_variance32x64_c, 0), SubpelVarianceParams(5, 5, &aom_sub_pixel_variance32x32_c, 0), SubpelVarianceParams(5, 4, &aom_sub_pixel_variance32x16_c, 0), SubpelVarianceParams(4, 5, &aom_sub_pixel_variance16x32_c, 0), SubpelVarianceParams(4, 4, &aom_sub_pixel_variance16x16_c, 0), SubpelVarianceParams(4, 3, &aom_sub_pixel_variance16x8_c, 0), SubpelVarianceParams(3, 4, &aom_sub_pixel_variance8x16_c, 0), SubpelVarianceParams(3, 3, &aom_sub_pixel_variance8x8_c, 0), SubpelVarianceParams(3, 2, &aom_sub_pixel_variance8x4_c, 0), SubpelVarianceParams(2, 3, &aom_sub_pixel_variance4x8_c, 0), SubpelVarianceParams(2, 2, &aom_sub_pixel_variance4x4_c, 0), #if !CONFIG_REALTIME_ONLY SubpelVarianceParams(6, 4, &aom_sub_pixel_variance64x16_c, 0), SubpelVarianceParams(4, 6, &aom_sub_pixel_variance16x64_c, 0), SubpelVarianceParams(5, 3, &aom_sub_pixel_variance32x8_c, 0), SubpelVarianceParams(3, 5, &aom_sub_pixel_variance8x32_c, 0), SubpelVarianceParams(4, 2, &aom_sub_pixel_variance16x4_c, 0), SubpelVarianceParams(2, 4, &aom_sub_pixel_variance4x16_c, 0), #endif }; INSTANTIATE_TEST_SUITE_P(C, AvxSubpelVarianceTest, ::testing::ValuesIn(kArraySubpelVariance_c)); typedef TestParams SubpelAvgVarianceParams; const SubpelAvgVarianceParams kArraySubpelAvgVariance_c[] = { SubpelAvgVarianceParams(7, 7, &aom_sub_pixel_avg_variance128x128_c, 0), SubpelAvgVarianceParams(7, 6, &aom_sub_pixel_avg_variance128x64_c, 0), SubpelAvgVarianceParams(6, 7, &aom_sub_pixel_avg_variance64x128_c, 0), SubpelAvgVarianceParams(6, 6, &aom_sub_pixel_avg_variance64x64_c, 0), SubpelAvgVarianceParams(6, 5, &aom_sub_pixel_avg_variance64x32_c, 0), SubpelAvgVarianceParams(5, 6, &aom_sub_pixel_avg_variance32x64_c, 0), SubpelAvgVarianceParams(5, 5, &aom_sub_pixel_avg_variance32x32_c, 0), SubpelAvgVarianceParams(5, 4, &aom_sub_pixel_avg_variance32x16_c, 0), SubpelAvgVarianceParams(4, 5, &aom_sub_pixel_avg_variance16x32_c, 0), SubpelAvgVarianceParams(4, 4, &aom_sub_pixel_avg_variance16x16_c, 0), SubpelAvgVarianceParams(4, 3, &aom_sub_pixel_avg_variance16x8_c, 0), SubpelAvgVarianceParams(3, 4, &aom_sub_pixel_avg_variance8x16_c, 0), SubpelAvgVarianceParams(3, 3, &aom_sub_pixel_avg_variance8x8_c, 0), SubpelAvgVarianceParams(3, 2, &aom_sub_pixel_avg_variance8x4_c, 0), SubpelAvgVarianceParams(2, 3, &aom_sub_pixel_avg_variance4x8_c, 0), SubpelAvgVarianceParams(2, 2, &aom_sub_pixel_avg_variance4x4_c, 0), #if !CONFIG_REALTIME_ONLY SubpelAvgVarianceParams(6, 4, &aom_sub_pixel_avg_variance64x16_c, 0), SubpelAvgVarianceParams(4, 6, &aom_sub_pixel_avg_variance16x64_c, 0), SubpelAvgVarianceParams(5, 3, &aom_sub_pixel_avg_variance32x8_c, 0), SubpelAvgVarianceParams(3, 5, &aom_sub_pixel_avg_variance8x32_c, 0), SubpelAvgVarianceParams(4, 2, &aom_sub_pixel_avg_variance16x4_c, 0), SubpelAvgVarianceParams(2, 4, &aom_sub_pixel_avg_variance4x16_c, 0), #endif }; INSTANTIATE_TEST_SUITE_P(C, AvxSubpelAvgVarianceTest, ::testing::ValuesIn(kArraySubpelAvgVariance_c)); typedef TestParams DistWtdSubpelAvgVarianceParams; const DistWtdSubpelAvgVarianceParams kArrayDistWtdSubpelAvgVariance_c[] = { DistWtdSubpelAvgVarianceParams( 6, 6, &aom_dist_wtd_sub_pixel_avg_variance64x64_c, 0), DistWtdSubpelAvgVarianceParams( 6, 5, &aom_dist_wtd_sub_pixel_avg_variance64x32_c, 0), DistWtdSubpelAvgVarianceParams( 5, 6, &aom_dist_wtd_sub_pixel_avg_variance32x64_c, 0), DistWtdSubpelAvgVarianceParams( 5, 5, &aom_dist_wtd_sub_pixel_avg_variance32x32_c, 0), DistWtdSubpelAvgVarianceParams( 5, 4, &aom_dist_wtd_sub_pixel_avg_variance32x16_c, 0), DistWtdSubpelAvgVarianceParams( 4, 5, &aom_dist_wtd_sub_pixel_avg_variance16x32_c, 0), DistWtdSubpelAvgVarianceParams( 4, 4, &aom_dist_wtd_sub_pixel_avg_variance16x16_c, 0), DistWtdSubpelAvgVarianceParams(4, 3, &aom_dist_wtd_sub_pixel_avg_variance16x8_c, 0), DistWtdSubpelAvgVarianceParams(3, 4, &aom_dist_wtd_sub_pixel_avg_variance8x16_c, 0), DistWtdSubpelAvgVarianceParams(3, 3, &aom_dist_wtd_sub_pixel_avg_variance8x8_c, 0), DistWtdSubpelAvgVarianceParams(3, 2, &aom_dist_wtd_sub_pixel_avg_variance8x4_c, 0), DistWtdSubpelAvgVarianceParams(2, 3, &aom_dist_wtd_sub_pixel_avg_variance4x8_c, 0), DistWtdSubpelAvgVarianceParams(2, 2, &aom_dist_wtd_sub_pixel_avg_variance4x4_c, 0), #if !CONFIG_REALTIME_ONLY DistWtdSubpelAvgVarianceParams( 6, 4, &aom_dist_wtd_sub_pixel_avg_variance64x16_c, 0), DistWtdSubpelAvgVarianceParams( 4, 6, &aom_dist_wtd_sub_pixel_avg_variance16x64_c, 0), DistWtdSubpelAvgVarianceParams(5, 3, &aom_dist_wtd_sub_pixel_avg_variance32x8_c, 0), DistWtdSubpelAvgVarianceParams(3, 5, &aom_dist_wtd_sub_pixel_avg_variance8x32_c, 0), DistWtdSubpelAvgVarianceParams(4, 2, &aom_dist_wtd_sub_pixel_avg_variance16x4_c, 0), DistWtdSubpelAvgVarianceParams(2, 4, &aom_dist_wtd_sub_pixel_avg_variance4x16_c, 0), #endif }; INSTANTIATE_TEST_SUITE_P(C, AvxDistWtdSubpelAvgVarianceTest, ::testing::ValuesIn(kArrayDistWtdSubpelAvgVariance_c)); #if !CONFIG_REALTIME_ONLY INSTANTIATE_TEST_SUITE_P( C, AvxObmcSubpelVarianceTest, ::testing::Values( ObmcSubpelVarianceParams(7, 7, &aom_obmc_sub_pixel_variance128x128_c, 0), ObmcSubpelVarianceParams(7, 6, &aom_obmc_sub_pixel_variance128x64_c, 0), ObmcSubpelVarianceParams(6, 7, &aom_obmc_sub_pixel_variance64x128_c, 0), ObmcSubpelVarianceParams(6, 6, &aom_obmc_sub_pixel_variance64x64_c, 0), ObmcSubpelVarianceParams(6, 5, &aom_obmc_sub_pixel_variance64x32_c, 0), ObmcSubpelVarianceParams(5, 6, &aom_obmc_sub_pixel_variance32x64_c, 0), ObmcSubpelVarianceParams(5, 5, &aom_obmc_sub_pixel_variance32x32_c, 0), ObmcSubpelVarianceParams(5, 4, &aom_obmc_sub_pixel_variance32x16_c, 0), ObmcSubpelVarianceParams(4, 5, &aom_obmc_sub_pixel_variance16x32_c, 0), ObmcSubpelVarianceParams(4, 4, &aom_obmc_sub_pixel_variance16x16_c, 0), ObmcSubpelVarianceParams(4, 3, &aom_obmc_sub_pixel_variance16x8_c, 0), ObmcSubpelVarianceParams(3, 4, &aom_obmc_sub_pixel_variance8x16_c, 0), ObmcSubpelVarianceParams(3, 3, &aom_obmc_sub_pixel_variance8x8_c, 0), ObmcSubpelVarianceParams(3, 2, &aom_obmc_sub_pixel_variance8x4_c, 0), ObmcSubpelVarianceParams(2, 3, &aom_obmc_sub_pixel_variance4x8_c, 0), ObmcSubpelVarianceParams(2, 2, &aom_obmc_sub_pixel_variance4x4_c, 0), ObmcSubpelVarianceParams(6, 4, &aom_obmc_sub_pixel_variance64x16_c, 0), ObmcSubpelVarianceParams(4, 6, &aom_obmc_sub_pixel_variance16x64_c, 0), ObmcSubpelVarianceParams(5, 3, &aom_obmc_sub_pixel_variance32x8_c, 0), ObmcSubpelVarianceParams(3, 5, &aom_obmc_sub_pixel_variance8x32_c, 0), ObmcSubpelVarianceParams(4, 2, &aom_obmc_sub_pixel_variance16x4_c, 0), ObmcSubpelVarianceParams(2, 4, &aom_obmc_sub_pixel_variance4x16_c, 0))); #endif #if CONFIG_AV1_HIGHBITDEPTH typedef uint64_t (*MseHBDWxH16bitFunc)(uint16_t *dst, int dstride, uint16_t *src, int sstride, int w, int h); template class MseHBDWxHTestClass : public ::testing::TestWithParam > { public: void SetUp() override { params_ = this->GetParam(); rnd_.Reset(ACMRandom::DeterministicSeed()); src_ = reinterpret_cast( aom_memalign(16, block_size() * sizeof(src_))); dst_ = reinterpret_cast( aom_memalign(16, block_size() * sizeof(dst_))); ASSERT_NE(src_, nullptr); ASSERT_NE(dst_, nullptr); } void TearDown() override { aom_free(src_); aom_free(dst_); src_ = nullptr; dst_ = nullptr; } protected: void RefMatchTestMse(); void SpeedTest(); protected: ACMRandom rnd_; uint16_t *dst_; uint16_t *src_; TestParams params_; // some relay helpers int block_size() const { return params_.block_size; } int width() const { return params_.width; } int d_stride() const { return params_.width; } // stride is same as width int s_stride() const { return params_.width; } // stride is same as width int height() const { return params_.height; } int mask() const { return params_.mask; } }; template void MseHBDWxHTestClass::SpeedTest() { aom_usec_timer ref_timer, test_timer; double elapsed_time_c = 0; double elapsed_time_simd = 0; int run_time = 10000000; int w = width(); int h = height(); int dstride = d_stride(); int sstride = s_stride(); for (int k = 0; k < block_size(); ++k) { dst_[k] = rnd_.Rand16() & mask(); src_[k] = rnd_.Rand16() & mask(); } aom_usec_timer_start(&ref_timer); for (int i = 0; i < run_time; i++) { aom_mse_wxh_16bit_highbd_c(dst_, dstride, src_, sstride, w, h); } aom_usec_timer_mark(&ref_timer); elapsed_time_c = static_cast(aom_usec_timer_elapsed(&ref_timer)); aom_usec_timer_start(&test_timer); for (int i = 0; i < run_time; i++) { params_.func(dst_, dstride, src_, sstride, w, h); } aom_usec_timer_mark(&test_timer); elapsed_time_simd = static_cast(aom_usec_timer_elapsed(&test_timer)); printf("%dx%d\tc_time=%lf \t simd_time=%lf \t gain=%lf\n", width(), height(), elapsed_time_c, elapsed_time_simd, (elapsed_time_c / elapsed_time_simd)); } template void MseHBDWxHTestClass::RefMatchTestMse() { uint64_t mse_ref = 0; uint64_t mse_mod = 0; int w = width(); int h = height(); int dstride = d_stride(); int sstride = s_stride(); for (int i = 0; i < 10; i++) { for (int k = 0; k < block_size(); ++k) { dst_[k] = rnd_.Rand16() & mask(); src_[k] = rnd_.Rand16() & mask(); } API_REGISTER_STATE_CHECK(mse_ref = aom_mse_wxh_16bit_highbd_c( dst_, dstride, src_, sstride, w, h)); API_REGISTER_STATE_CHECK( mse_mod = params_.func(dst_, dstride, src_, sstride, w, h)); EXPECT_EQ(mse_ref, mse_mod) << "ref mse: " << mse_ref << " mod mse: " << mse_mod; } } typedef TestParams MseHBDWxHParams; typedef MseHBDWxHTestClass MseHBDWxHTest; typedef MainTestClass AvxHBDMseTest; GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AvxHBDMseTest); typedef MainTestClass AvxHBDVarianceTest; typedef SubpelVarianceTest AvxHBDSubpelVarianceTest; typedef SubpelVarianceTest AvxHBDSubpelAvgVarianceTest; typedef SubpelVarianceTest AvxHBDDistWtdSubpelAvgVarianceTest; #if !CONFIG_REALTIME_ONLY typedef ObmcVarianceTest AvxHBDObmcSubpelVarianceTest; #endif GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AvxHBDObmcSubpelVarianceTest); TEST_P(MseHBDWxHTest, RefMse) { RefMatchTestMse(); } TEST_P(MseHBDWxHTest, DISABLED_SpeedMse) { SpeedTest(); } TEST_P(AvxHBDMseTest, RefMse) { RefTestMse(); } TEST_P(AvxHBDMseTest, MaxMse) { MaxTestMse(); } TEST_P(AvxHBDMseTest, DISABLED_SpeedMse) { SpeedTest(); } TEST_P(AvxHBDVarianceTest, Zero) { ZeroTest(); } TEST_P(AvxHBDVarianceTest, Ref) { RefTest(); } TEST_P(AvxHBDVarianceTest, RefStride) { RefStrideTest(); } TEST_P(AvxHBDVarianceTest, OneQuarter) { OneQuarterTest(); } TEST_P(AvxHBDVarianceTest, DISABLED_Speed) { SpeedTest(); } TEST_P(AvxHBDSubpelVarianceTest, Ref) { RefTest(); } TEST_P(AvxHBDSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); } TEST_P(AvxHBDSubpelVarianceTest, DISABLED_Speed) { SpeedTest(); } TEST_P(AvxHBDSubpelAvgVarianceTest, Ref) { RefTest(); } TEST_P(AvxHBDDistWtdSubpelAvgVarianceTest, Ref) { RefTest(); } #if !CONFIG_REALTIME_ONLY TEST_P(AvxHBDObmcSubpelVarianceTest, Ref) { RefTest(); } TEST_P(AvxHBDObmcSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); } TEST_P(AvxHBDObmcSubpelVarianceTest, DISABLED_Speed) { SpeedTest(); } #endif INSTANTIATE_TEST_SUITE_P( C, MseHBDWxHTest, ::testing::Values(MseHBDWxHParams(3, 3, &aom_mse_wxh_16bit_highbd_c, 10), MseHBDWxHParams(3, 2, &aom_mse_wxh_16bit_highbd_c, 10), MseHBDWxHParams(2, 3, &aom_mse_wxh_16bit_highbd_c, 10), MseHBDWxHParams(2, 2, &aom_mse_wxh_16bit_highbd_c, 10))); INSTANTIATE_TEST_SUITE_P( C, AvxHBDMseTest, ::testing::Values(MseParams(4, 4, &aom_highbd_12_mse16x16_c, 12), MseParams(4, 3, &aom_highbd_12_mse16x8_c, 12), MseParams(3, 4, &aom_highbd_12_mse8x16_c, 12), MseParams(3, 3, &aom_highbd_12_mse8x8_c, 12), MseParams(4, 4, &aom_highbd_10_mse16x16_c, 10), MseParams(4, 3, &aom_highbd_10_mse16x8_c, 10), MseParams(3, 4, &aom_highbd_10_mse8x16_c, 10), MseParams(3, 3, &aom_highbd_10_mse8x8_c, 10), MseParams(4, 4, &aom_highbd_8_mse16x16_c, 8), MseParams(4, 3, &aom_highbd_8_mse16x8_c, 8), MseParams(3, 4, &aom_highbd_8_mse8x16_c, 8), MseParams(3, 3, &aom_highbd_8_mse8x8_c, 8))); #if HAVE_NEON INSTANTIATE_TEST_SUITE_P( NEON, MseHBDWxHTest, ::testing::Values(MseHBDWxHParams(3, 3, &aom_mse_wxh_16bit_highbd_neon, 10), MseHBDWxHParams(3, 2, &aom_mse_wxh_16bit_highbd_neon, 10), MseHBDWxHParams(2, 3, &aom_mse_wxh_16bit_highbd_neon, 10), MseHBDWxHParams(2, 2, &aom_mse_wxh_16bit_highbd_neon, 10))); INSTANTIATE_TEST_SUITE_P( NEON, AvxHBDMseTest, ::testing::Values(MseParams(4, 4, &aom_highbd_12_mse16x16_neon, 12), MseParams(4, 3, &aom_highbd_12_mse16x8_neon, 12), MseParams(3, 4, &aom_highbd_12_mse8x16_neon, 12), MseParams(3, 3, &aom_highbd_12_mse8x8_neon, 12), MseParams(4, 4, &aom_highbd_10_mse16x16_neon, 10), MseParams(4, 3, &aom_highbd_10_mse16x8_neon, 10), MseParams(3, 4, &aom_highbd_10_mse8x16_neon, 10), MseParams(3, 3, &aom_highbd_10_mse8x8_neon, 10), MseParams(4, 4, &aom_highbd_8_mse16x16_neon, 8), MseParams(4, 3, &aom_highbd_8_mse16x8_neon, 8), MseParams(3, 4, &aom_highbd_8_mse8x16_neon, 8), MseParams(3, 3, &aom_highbd_8_mse8x8_neon, 8))); #endif // HAVE_NEON #if HAVE_NEON_DOTPROD INSTANTIATE_TEST_SUITE_P( NEON_DOTPROD, AvxHBDMseTest, ::testing::Values(MseParams(4, 4, &aom_highbd_8_mse16x16_neon_dotprod, 8), MseParams(4, 3, &aom_highbd_8_mse16x8_neon_dotprod, 8), MseParams(3, 4, &aom_highbd_8_mse8x16_neon_dotprod, 8), MseParams(3, 3, &aom_highbd_8_mse8x8_neon_dotprod, 8))); #endif // HAVE_NEON_DOTPROD #if HAVE_SVE INSTANTIATE_TEST_SUITE_P( SVE, MseHBDWxHTest, ::testing::Values(MseHBDWxHParams(3, 3, &aom_mse_wxh_16bit_highbd_sve, 10), MseHBDWxHParams(3, 2, &aom_mse_wxh_16bit_highbd_sve, 10), MseHBDWxHParams(2, 3, &aom_mse_wxh_16bit_highbd_sve, 10), MseHBDWxHParams(2, 2, &aom_mse_wxh_16bit_highbd_sve, 10))); INSTANTIATE_TEST_SUITE_P( SVE, AvxHBDMseTest, ::testing::Values(MseParams(4, 4, &aom_highbd_12_mse16x16_sve, 12), MseParams(4, 3, &aom_highbd_12_mse16x8_sve, 12), MseParams(3, 4, &aom_highbd_12_mse8x16_sve, 12), MseParams(3, 3, &aom_highbd_12_mse8x8_sve, 12), MseParams(4, 4, &aom_highbd_10_mse16x16_sve, 10), MseParams(4, 3, &aom_highbd_10_mse16x8_sve, 10), MseParams(3, 4, &aom_highbd_10_mse8x16_sve, 10), MseParams(3, 3, &aom_highbd_10_mse8x8_sve, 10))); #endif // HAVE_SVE const VarianceParams kArrayHBDVariance_c[] = { VarianceParams(7, 7, &aom_highbd_12_variance128x128_c, 12), VarianceParams(7, 6, &aom_highbd_12_variance128x64_c, 12), VarianceParams(6, 7, &aom_highbd_12_variance64x128_c, 12), VarianceParams(6, 6, &aom_highbd_12_variance64x64_c, 12), VarianceParams(6, 5, &aom_highbd_12_variance64x32_c, 12), VarianceParams(5, 6, &aom_highbd_12_variance32x64_c, 12), VarianceParams(5, 5, &aom_highbd_12_variance32x32_c, 12), VarianceParams(5, 4, &aom_highbd_12_variance32x16_c, 12), VarianceParams(4, 5, &aom_highbd_12_variance16x32_c, 12), VarianceParams(4, 4, &aom_highbd_12_variance16x16_c, 12), VarianceParams(4, 3, &aom_highbd_12_variance16x8_c, 12), VarianceParams(3, 4, &aom_highbd_12_variance8x16_c, 12), VarianceParams(3, 3, &aom_highbd_12_variance8x8_c, 12), VarianceParams(3, 2, &aom_highbd_12_variance8x4_c, 12), VarianceParams(2, 3, &aom_highbd_12_variance4x8_c, 12), VarianceParams(2, 2, &aom_highbd_12_variance4x4_c, 12), VarianceParams(7, 7, &aom_highbd_10_variance128x128_c, 10), VarianceParams(7, 6, &aom_highbd_10_variance128x64_c, 10), VarianceParams(6, 7, &aom_highbd_10_variance64x128_c, 10), VarianceParams(6, 6, &aom_highbd_10_variance64x64_c, 10), VarianceParams(6, 5, &aom_highbd_10_variance64x32_c, 10), VarianceParams(5, 6, &aom_highbd_10_variance32x64_c, 10), VarianceParams(5, 5, &aom_highbd_10_variance32x32_c, 10), VarianceParams(5, 4, &aom_highbd_10_variance32x16_c, 10), VarianceParams(4, 5, &aom_highbd_10_variance16x32_c, 10), VarianceParams(4, 4, &aom_highbd_10_variance16x16_c, 10), VarianceParams(4, 3, &aom_highbd_10_variance16x8_c, 10), VarianceParams(3, 4, &aom_highbd_10_variance8x16_c, 10), VarianceParams(3, 3, &aom_highbd_10_variance8x8_c, 10), VarianceParams(3, 2, &aom_highbd_10_variance8x4_c, 10), VarianceParams(2, 3, &aom_highbd_10_variance4x8_c, 10), VarianceParams(2, 2, &aom_highbd_10_variance4x4_c, 10), VarianceParams(7, 7, &aom_highbd_8_variance128x128_c, 8), VarianceParams(7, 6, &aom_highbd_8_variance128x64_c, 8), VarianceParams(6, 7, &aom_highbd_8_variance64x128_c, 8), VarianceParams(6, 6, &aom_highbd_8_variance64x64_c, 8), VarianceParams(6, 5, &aom_highbd_8_variance64x32_c, 8), VarianceParams(5, 6, &aom_highbd_8_variance32x64_c, 8), VarianceParams(5, 5, &aom_highbd_8_variance32x32_c, 8), VarianceParams(5, 4, &aom_highbd_8_variance32x16_c, 8), VarianceParams(4, 5, &aom_highbd_8_variance16x32_c, 8), VarianceParams(4, 4, &aom_highbd_8_variance16x16_c, 8), VarianceParams(4, 3, &aom_highbd_8_variance16x8_c, 8), VarianceParams(3, 4, &aom_highbd_8_variance8x16_c, 8), VarianceParams(3, 3, &aom_highbd_8_variance8x8_c, 8), VarianceParams(3, 2, &aom_highbd_8_variance8x4_c, 8), VarianceParams(2, 3, &aom_highbd_8_variance4x8_c, 8), VarianceParams(2, 2, &aom_highbd_8_variance4x4_c, 8), #if !CONFIG_REALTIME_ONLY VarianceParams(6, 4, &aom_highbd_12_variance64x16_c, 12), VarianceParams(4, 6, &aom_highbd_12_variance16x64_c, 12), VarianceParams(5, 3, &aom_highbd_12_variance32x8_c, 12), VarianceParams(3, 5, &aom_highbd_12_variance8x32_c, 12), VarianceParams(4, 2, &aom_highbd_12_variance16x4_c, 12), VarianceParams(2, 4, &aom_highbd_12_variance4x16_c, 12), VarianceParams(6, 4, &aom_highbd_10_variance64x16_c, 10), VarianceParams(4, 6, &aom_highbd_10_variance16x64_c, 10), VarianceParams(5, 3, &aom_highbd_10_variance32x8_c, 10), VarianceParams(3, 5, &aom_highbd_10_variance8x32_c, 10), VarianceParams(4, 2, &aom_highbd_10_variance16x4_c, 10), VarianceParams(2, 4, &aom_highbd_10_variance4x16_c, 10), VarianceParams(6, 4, &aom_highbd_8_variance64x16_c, 8), VarianceParams(4, 6, &aom_highbd_8_variance16x64_c, 8), VarianceParams(5, 3, &aom_highbd_8_variance32x8_c, 8), VarianceParams(3, 5, &aom_highbd_8_variance8x32_c, 8), VarianceParams(4, 2, &aom_highbd_8_variance16x4_c, 8), VarianceParams(2, 4, &aom_highbd_8_variance4x16_c, 8), #endif }; INSTANTIATE_TEST_SUITE_P(C, AvxHBDVarianceTest, ::testing::ValuesIn(kArrayHBDVariance_c)); #if HAVE_SSE4_1 INSTANTIATE_TEST_SUITE_P( SSE4_1, AvxHBDVarianceTest, ::testing::Values( VarianceParams(2, 2, &aom_highbd_8_variance4x4_sse4_1, 8), VarianceParams(2, 2, &aom_highbd_10_variance4x4_sse4_1, 10), VarianceParams(2, 2, &aom_highbd_12_variance4x4_sse4_1, 12))); #endif // HAVE_SSE4_1 const SubpelVarianceParams kArrayHBDSubpelVariance_c[] = { SubpelVarianceParams(7, 7, &aom_highbd_8_sub_pixel_variance128x128_c, 8), SubpelVarianceParams(7, 6, &aom_highbd_8_sub_pixel_variance128x64_c, 8), SubpelVarianceParams(6, 7, &aom_highbd_8_sub_pixel_variance64x128_c, 8), SubpelVarianceParams(6, 6, &aom_highbd_8_sub_pixel_variance64x64_c, 8), SubpelVarianceParams(6, 5, &aom_highbd_8_sub_pixel_variance64x32_c, 8), SubpelVarianceParams(5, 6, &aom_highbd_8_sub_pixel_variance32x64_c, 8), SubpelVarianceParams(5, 5, &aom_highbd_8_sub_pixel_variance32x32_c, 8), SubpelVarianceParams(5, 4, &aom_highbd_8_sub_pixel_variance32x16_c, 8), SubpelVarianceParams(4, 5, &aom_highbd_8_sub_pixel_variance16x32_c, 8), SubpelVarianceParams(4, 4, &aom_highbd_8_sub_pixel_variance16x16_c, 8), SubpelVarianceParams(4, 3, &aom_highbd_8_sub_pixel_variance16x8_c, 8), SubpelVarianceParams(3, 4, &aom_highbd_8_sub_pixel_variance8x16_c, 8), SubpelVarianceParams(3, 3, &aom_highbd_8_sub_pixel_variance8x8_c, 8), SubpelVarianceParams(3, 2, &aom_highbd_8_sub_pixel_variance8x4_c, 8), SubpelVarianceParams(2, 3, &aom_highbd_8_sub_pixel_variance4x8_c, 8), SubpelVarianceParams(2, 2, &aom_highbd_8_sub_pixel_variance4x4_c, 8), SubpelVarianceParams(7, 7, &aom_highbd_10_sub_pixel_variance128x128_c, 10), SubpelVarianceParams(7, 6, &aom_highbd_10_sub_pixel_variance128x64_c, 10), SubpelVarianceParams(6, 7, &aom_highbd_10_sub_pixel_variance64x128_c, 10), SubpelVarianceParams(6, 6, &aom_highbd_10_sub_pixel_variance64x64_c, 10), SubpelVarianceParams(6, 5, &aom_highbd_10_sub_pixel_variance64x32_c, 10), SubpelVarianceParams(5, 6, &aom_highbd_10_sub_pixel_variance32x64_c, 10), SubpelVarianceParams(5, 5, &aom_highbd_10_sub_pixel_variance32x32_c, 10), SubpelVarianceParams(5, 4, &aom_highbd_10_sub_pixel_variance32x16_c, 10), SubpelVarianceParams(4, 5, &aom_highbd_10_sub_pixel_variance16x32_c, 10), SubpelVarianceParams(4, 4, &aom_highbd_10_sub_pixel_variance16x16_c, 10), SubpelVarianceParams(4, 3, &aom_highbd_10_sub_pixel_variance16x8_c, 10), SubpelVarianceParams(3, 4, &aom_highbd_10_sub_pixel_variance8x16_c, 10), SubpelVarianceParams(3, 3, &aom_highbd_10_sub_pixel_variance8x8_c, 10), SubpelVarianceParams(3, 2, &aom_highbd_10_sub_pixel_variance8x4_c, 10), SubpelVarianceParams(2, 3, &aom_highbd_10_sub_pixel_variance4x8_c, 10), SubpelVarianceParams(2, 2, &aom_highbd_10_sub_pixel_variance4x4_c, 10), SubpelVarianceParams(7, 7, &aom_highbd_12_sub_pixel_variance128x128_c, 12), SubpelVarianceParams(7, 6, &aom_highbd_12_sub_pixel_variance128x64_c, 12), SubpelVarianceParams(6, 7, &aom_highbd_12_sub_pixel_variance64x128_c, 12), SubpelVarianceParams(6, 6, &aom_highbd_12_sub_pixel_variance64x64_c, 12), SubpelVarianceParams(6, 5, &aom_highbd_12_sub_pixel_variance64x32_c, 12), SubpelVarianceParams(5, 6, &aom_highbd_12_sub_pixel_variance32x64_c, 12), SubpelVarianceParams(5, 5, &aom_highbd_12_sub_pixel_variance32x32_c, 12), SubpelVarianceParams(5, 4, &aom_highbd_12_sub_pixel_variance32x16_c, 12), SubpelVarianceParams(4, 5, &aom_highbd_12_sub_pixel_variance16x32_c, 12), SubpelVarianceParams(4, 4, &aom_highbd_12_sub_pixel_variance16x16_c, 12), SubpelVarianceParams(4, 3, &aom_highbd_12_sub_pixel_variance16x8_c, 12), SubpelVarianceParams(3, 4, &aom_highbd_12_sub_pixel_variance8x16_c, 12), SubpelVarianceParams(3, 3, &aom_highbd_12_sub_pixel_variance8x8_c, 12), SubpelVarianceParams(3, 2, &aom_highbd_12_sub_pixel_variance8x4_c, 12), SubpelVarianceParams(2, 3, &aom_highbd_12_sub_pixel_variance4x8_c, 12), SubpelVarianceParams(2, 2, &aom_highbd_12_sub_pixel_variance4x4_c, 12), #if !CONFIG_REALTIME_ONLY SubpelVarianceParams(6, 4, &aom_highbd_8_sub_pixel_variance64x16_c, 8), SubpelVarianceParams(4, 6, &aom_highbd_8_sub_pixel_variance16x64_c, 8), SubpelVarianceParams(5, 3, &aom_highbd_8_sub_pixel_variance32x8_c, 8), SubpelVarianceParams(3, 5, &aom_highbd_8_sub_pixel_variance8x32_c, 8), SubpelVarianceParams(4, 2, &aom_highbd_8_sub_pixel_variance16x4_c, 8), SubpelVarianceParams(2, 4, &aom_highbd_8_sub_pixel_variance4x16_c, 8), SubpelVarianceParams(6, 4, &aom_highbd_10_sub_pixel_variance64x16_c, 10), SubpelVarianceParams(4, 6, &aom_highbd_10_sub_pixel_variance16x64_c, 10), SubpelVarianceParams(5, 3, &aom_highbd_10_sub_pixel_variance32x8_c, 10), SubpelVarianceParams(3, 5, &aom_highbd_10_sub_pixel_variance8x32_c, 10), SubpelVarianceParams(4, 2, &aom_highbd_10_sub_pixel_variance16x4_c, 10), SubpelVarianceParams(2, 4, &aom_highbd_10_sub_pixel_variance4x16_c, 10), SubpelVarianceParams(6, 4, &aom_highbd_12_sub_pixel_variance64x16_c, 12), SubpelVarianceParams(4, 6, &aom_highbd_12_sub_pixel_variance16x64_c, 12), SubpelVarianceParams(5, 3, &aom_highbd_12_sub_pixel_variance32x8_c, 12), SubpelVarianceParams(3, 5, &aom_highbd_12_sub_pixel_variance8x32_c, 12), SubpelVarianceParams(4, 2, &aom_highbd_12_sub_pixel_variance16x4_c, 12), SubpelVarianceParams(2, 4, &aom_highbd_12_sub_pixel_variance4x16_c, 12), #endif }; INSTANTIATE_TEST_SUITE_P(C, AvxHBDSubpelVarianceTest, ::testing::ValuesIn(kArrayHBDSubpelVariance_c)); const SubpelAvgVarianceParams kArrayHBDSubpelAvgVariance_c[] = { SubpelAvgVarianceParams(7, 7, &aom_highbd_8_sub_pixel_avg_variance128x128_c, 8), SubpelAvgVarianceParams(7, 6, &aom_highbd_8_sub_pixel_avg_variance128x64_c, 8), SubpelAvgVarianceParams(6, 7, &aom_highbd_8_sub_pixel_avg_variance64x128_c, 8), SubpelAvgVarianceParams(6, 6, &aom_highbd_8_sub_pixel_avg_variance64x64_c, 8), SubpelAvgVarianceParams(6, 5, &aom_highbd_8_sub_pixel_avg_variance64x32_c, 8), SubpelAvgVarianceParams(5, 6, &aom_highbd_8_sub_pixel_avg_variance32x64_c, 8), SubpelAvgVarianceParams(5, 5, &aom_highbd_8_sub_pixel_avg_variance32x32_c, 8), SubpelAvgVarianceParams(5, 4, &aom_highbd_8_sub_pixel_avg_variance32x16_c, 8), SubpelAvgVarianceParams(4, 5, &aom_highbd_8_sub_pixel_avg_variance16x32_c, 8), SubpelAvgVarianceParams(4, 4, &aom_highbd_8_sub_pixel_avg_variance16x16_c, 8), SubpelAvgVarianceParams(4, 3, &aom_highbd_8_sub_pixel_avg_variance16x8_c, 8), SubpelAvgVarianceParams(3, 4, &aom_highbd_8_sub_pixel_avg_variance8x16_c, 8), SubpelAvgVarianceParams(3, 3, &aom_highbd_8_sub_pixel_avg_variance8x8_c, 8), SubpelAvgVarianceParams(3, 2, &aom_highbd_8_sub_pixel_avg_variance8x4_c, 8), SubpelAvgVarianceParams(2, 3, &aom_highbd_8_sub_pixel_avg_variance4x8_c, 8), SubpelAvgVarianceParams(2, 2, &aom_highbd_8_sub_pixel_avg_variance4x4_c, 8), SubpelAvgVarianceParams(7, 7, &aom_highbd_10_sub_pixel_avg_variance128x128_c, 10), SubpelAvgVarianceParams(7, 6, &aom_highbd_10_sub_pixel_avg_variance128x64_c, 10), SubpelAvgVarianceParams(6, 7, &aom_highbd_10_sub_pixel_avg_variance64x128_c, 10), SubpelAvgVarianceParams(6, 6, &aom_highbd_10_sub_pixel_avg_variance64x64_c, 10), SubpelAvgVarianceParams(6, 5, &aom_highbd_10_sub_pixel_avg_variance64x32_c, 10), SubpelAvgVarianceParams(5, 6, &aom_highbd_10_sub_pixel_avg_variance32x64_c, 10), SubpelAvgVarianceParams(5, 5, &aom_highbd_10_sub_pixel_avg_variance32x32_c, 10), SubpelAvgVarianceParams(5, 4, &aom_highbd_10_sub_pixel_avg_variance32x16_c, 10), SubpelAvgVarianceParams(4, 5, &aom_highbd_10_sub_pixel_avg_variance16x32_c, 10), SubpelAvgVarianceParams(4, 4, &aom_highbd_10_sub_pixel_avg_variance16x16_c, 10), SubpelAvgVarianceParams(4, 3, &aom_highbd_10_sub_pixel_avg_variance16x8_c, 10), SubpelAvgVarianceParams(3, 4, &aom_highbd_10_sub_pixel_avg_variance8x16_c, 10), SubpelAvgVarianceParams(3, 3, &aom_highbd_10_sub_pixel_avg_variance8x8_c, 10), SubpelAvgVarianceParams(3, 2, &aom_highbd_10_sub_pixel_avg_variance8x4_c, 10), SubpelAvgVarianceParams(2, 3, &aom_highbd_10_sub_pixel_avg_variance4x8_c, 10), SubpelAvgVarianceParams(2, 2, &aom_highbd_10_sub_pixel_avg_variance4x4_c, 10), SubpelAvgVarianceParams(7, 7, &aom_highbd_12_sub_pixel_avg_variance128x128_c, 12), SubpelAvgVarianceParams(7, 6, &aom_highbd_12_sub_pixel_avg_variance128x64_c, 12), SubpelAvgVarianceParams(6, 7, &aom_highbd_12_sub_pixel_avg_variance64x128_c, 12), SubpelAvgVarianceParams(6, 6, &aom_highbd_12_sub_pixel_avg_variance64x64_c, 12), SubpelAvgVarianceParams(6, 5, &aom_highbd_12_sub_pixel_avg_variance64x32_c, 12), SubpelAvgVarianceParams(5, 6, &aom_highbd_12_sub_pixel_avg_variance32x64_c, 12), SubpelAvgVarianceParams(5, 5, &aom_highbd_12_sub_pixel_avg_variance32x32_c, 12), SubpelAvgVarianceParams(5, 4, &aom_highbd_12_sub_pixel_avg_variance32x16_c, 12), SubpelAvgVarianceParams(4, 5, &aom_highbd_12_sub_pixel_avg_variance16x32_c, 12), SubpelAvgVarianceParams(4, 4, &aom_highbd_12_sub_pixel_avg_variance16x16_c, 12), SubpelAvgVarianceParams(4, 3, &aom_highbd_12_sub_pixel_avg_variance16x8_c, 12), SubpelAvgVarianceParams(3, 4, &aom_highbd_12_sub_pixel_avg_variance8x16_c, 12), SubpelAvgVarianceParams(3, 3, &aom_highbd_12_sub_pixel_avg_variance8x8_c, 12), SubpelAvgVarianceParams(3, 2, &aom_highbd_12_sub_pixel_avg_variance8x4_c, 12), SubpelAvgVarianceParams(2, 3, &aom_highbd_12_sub_pixel_avg_variance4x8_c, 12), SubpelAvgVarianceParams(2, 2, &aom_highbd_12_sub_pixel_avg_variance4x4_c, 12), #if !CONFIG_REALTIME_ONLY SubpelAvgVarianceParams(6, 4, &aom_highbd_8_sub_pixel_avg_variance64x16_c, 8), SubpelAvgVarianceParams(4, 6, &aom_highbd_8_sub_pixel_avg_variance16x64_c, 8), SubpelAvgVarianceParams(5, 3, &aom_highbd_8_sub_pixel_avg_variance32x8_c, 8), SubpelAvgVarianceParams(3, 5, &aom_highbd_8_sub_pixel_avg_variance8x32_c, 8), SubpelAvgVarianceParams(4, 2, &aom_highbd_8_sub_pixel_avg_variance16x4_c, 8), SubpelAvgVarianceParams(2, 4, &aom_highbd_8_sub_pixel_avg_variance4x16_c, 8), SubpelAvgVarianceParams(6, 4, &aom_highbd_10_sub_pixel_avg_variance64x16_c, 10), SubpelAvgVarianceParams(4, 6, &aom_highbd_10_sub_pixel_avg_variance16x64_c, 10), SubpelAvgVarianceParams(5, 3, &aom_highbd_10_sub_pixel_avg_variance32x8_c, 10), SubpelAvgVarianceParams(3, 5, &aom_highbd_10_sub_pixel_avg_variance8x32_c, 10), SubpelAvgVarianceParams(4, 2, &aom_highbd_10_sub_pixel_avg_variance16x4_c, 10), SubpelAvgVarianceParams(2, 4, &aom_highbd_10_sub_pixel_avg_variance4x16_c, 10), SubpelAvgVarianceParams(6, 4, &aom_highbd_12_sub_pixel_avg_variance64x16_c, 12), SubpelAvgVarianceParams(4, 6, &aom_highbd_12_sub_pixel_avg_variance16x64_c, 12), SubpelAvgVarianceParams(5, 3, &aom_highbd_12_sub_pixel_avg_variance32x8_c, 12), SubpelAvgVarianceParams(3, 5, &aom_highbd_12_sub_pixel_avg_variance8x32_c, 12), SubpelAvgVarianceParams(4, 2, &aom_highbd_12_sub_pixel_avg_variance16x4_c, 12), SubpelAvgVarianceParams(2, 4, &aom_highbd_12_sub_pixel_avg_variance4x16_c, 12), #endif }; INSTANTIATE_TEST_SUITE_P(C, AvxHBDSubpelAvgVarianceTest, ::testing::ValuesIn(kArrayHBDSubpelAvgVariance_c)); const DistWtdSubpelAvgVarianceParams kArrayHBDDistWtdSubpelAvgVariance_c[] = { DistWtdSubpelAvgVarianceParams( 7, 7, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance128x128_c, 8), DistWtdSubpelAvgVarianceParams( 7, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance128x64_c, 8), DistWtdSubpelAvgVarianceParams( 6, 7, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x128_c, 8), DistWtdSubpelAvgVarianceParams( 6, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x64_c, 8), DistWtdSubpelAvgVarianceParams( 6, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x32_c, 8), DistWtdSubpelAvgVarianceParams( 5, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x64_c, 8), DistWtdSubpelAvgVarianceParams( 5, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x32_c, 8), DistWtdSubpelAvgVarianceParams( 5, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x16_c, 8), DistWtdSubpelAvgVarianceParams( 4, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x32_c, 8), DistWtdSubpelAvgVarianceParams( 4, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x16_c, 8), DistWtdSubpelAvgVarianceParams( 4, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x8_c, 8), DistWtdSubpelAvgVarianceParams( 3, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x16_c, 8), DistWtdSubpelAvgVarianceParams( 3, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x8_c, 8), DistWtdSubpelAvgVarianceParams( 3, 2, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x4_c, 8), DistWtdSubpelAvgVarianceParams( 2, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance4x8_c, 8), DistWtdSubpelAvgVarianceParams( 2, 2, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance4x4_c, 8), DistWtdSubpelAvgVarianceParams( 7, 7, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance128x128_c, 10), DistWtdSubpelAvgVarianceParams( 7, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance128x64_c, 10), DistWtdSubpelAvgVarianceParams( 6, 7, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x128_c, 10), DistWtdSubpelAvgVarianceParams( 6, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x64_c, 10), DistWtdSubpelAvgVarianceParams( 6, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x32_c, 10), DistWtdSubpelAvgVarianceParams( 5, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x64_c, 10), DistWtdSubpelAvgVarianceParams( 5, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x32_c, 10), DistWtdSubpelAvgVarianceParams( 5, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x16_c, 10), DistWtdSubpelAvgVarianceParams( 4, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x32_c, 10), DistWtdSubpelAvgVarianceParams( 4, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x16_c, 10), DistWtdSubpelAvgVarianceParams( 4, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x8_c, 10), DistWtdSubpelAvgVarianceParams( 3, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x16_c, 10), DistWtdSubpelAvgVarianceParams( 3, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x8_c, 10), DistWtdSubpelAvgVarianceParams( 3, 2, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x4_c, 10), DistWtdSubpelAvgVarianceParams( 2, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance4x8_c, 10), DistWtdSubpelAvgVarianceParams( 2, 2, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance4x4_c, 10), DistWtdSubpelAvgVarianceParams( 7, 7, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance128x128_c, 12), DistWtdSubpelAvgVarianceParams( 7, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance128x64_c, 12), DistWtdSubpelAvgVarianceParams( 6, 7, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x128_c, 12), DistWtdSubpelAvgVarianceParams( 6, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x64_c, 12), DistWtdSubpelAvgVarianceParams( 6, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x32_c, 12), DistWtdSubpelAvgVarianceParams( 5, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x64_c, 12), DistWtdSubpelAvgVarianceParams( 5, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x32_c, 12), DistWtdSubpelAvgVarianceParams( 5, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x16_c, 12), DistWtdSubpelAvgVarianceParams( 4, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x32_c, 12), DistWtdSubpelAvgVarianceParams( 4, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x16_c, 12), DistWtdSubpelAvgVarianceParams( 4, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x8_c, 12), DistWtdSubpelAvgVarianceParams( 3, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x16_c, 12), DistWtdSubpelAvgVarianceParams( 3, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x8_c, 12), DistWtdSubpelAvgVarianceParams( 3, 2, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x4_c, 12), DistWtdSubpelAvgVarianceParams( 2, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance4x8_c, 12), DistWtdSubpelAvgVarianceParams( 2, 2, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance4x4_c, 12), #if !CONFIG_REALTIME_ONLY DistWtdSubpelAvgVarianceParams( 6, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x16_c, 8), DistWtdSubpelAvgVarianceParams( 4, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x64_c, 8), DistWtdSubpelAvgVarianceParams( 5, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x8_c, 8), DistWtdSubpelAvgVarianceParams( 3, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x32_c, 8), DistWtdSubpelAvgVarianceParams( 4, 2, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x4_c, 8), DistWtdSubpelAvgVarianceParams( 2, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance4x16_c, 8), DistWtdSubpelAvgVarianceParams( 6, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x16_c, 10), DistWtdSubpelAvgVarianceParams( 4, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x64_c, 10), DistWtdSubpelAvgVarianceParams( 5, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x8_c, 10), DistWtdSubpelAvgVarianceParams( 3, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x32_c, 10), DistWtdSubpelAvgVarianceParams( 4, 2, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x4_c, 10), DistWtdSubpelAvgVarianceParams( 2, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance4x16_c, 10), DistWtdSubpelAvgVarianceParams( 6, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x16_c, 12), DistWtdSubpelAvgVarianceParams( 4, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x64_c, 12), DistWtdSubpelAvgVarianceParams( 5, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x8_c, 12), DistWtdSubpelAvgVarianceParams( 3, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x32_c, 12), DistWtdSubpelAvgVarianceParams( 4, 2, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x4_c, 12), DistWtdSubpelAvgVarianceParams( 2, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance4x16_c, 12), #endif }; INSTANTIATE_TEST_SUITE_P( C, AvxHBDDistWtdSubpelAvgVarianceTest, ::testing::ValuesIn(kArrayHBDDistWtdSubpelAvgVariance_c)); #if !CONFIG_REALTIME_ONLY const ObmcSubpelVarianceParams kArrayHBDObmcSubpelVariance_c[] = { ObmcSubpelVarianceParams(7, 7, &aom_highbd_8_obmc_sub_pixel_variance128x128_c, 8), ObmcSubpelVarianceParams(7, 6, &aom_highbd_8_obmc_sub_pixel_variance128x64_c, 8), ObmcSubpelVarianceParams(6, 7, &aom_highbd_8_obmc_sub_pixel_variance64x128_c, 8), ObmcSubpelVarianceParams(6, 6, &aom_highbd_8_obmc_sub_pixel_variance64x64_c, 8), ObmcSubpelVarianceParams(6, 5, &aom_highbd_8_obmc_sub_pixel_variance64x32_c, 8), ObmcSubpelVarianceParams(5, 6, &aom_highbd_8_obmc_sub_pixel_variance32x64_c, 8), ObmcSubpelVarianceParams(5, 5, &aom_highbd_8_obmc_sub_pixel_variance32x32_c, 8), ObmcSubpelVarianceParams(5, 4, &aom_highbd_8_obmc_sub_pixel_variance32x16_c, 8), ObmcSubpelVarianceParams(4, 5, &aom_highbd_8_obmc_sub_pixel_variance16x32_c, 8), ObmcSubpelVarianceParams(4, 4, &aom_highbd_8_obmc_sub_pixel_variance16x16_c, 8), ObmcSubpelVarianceParams(4, 3, &aom_highbd_8_obmc_sub_pixel_variance16x8_c, 8), ObmcSubpelVarianceParams(3, 4, &aom_highbd_8_obmc_sub_pixel_variance8x16_c, 8), ObmcSubpelVarianceParams(3, 3, &aom_highbd_8_obmc_sub_pixel_variance8x8_c, 8), ObmcSubpelVarianceParams(3, 2, &aom_highbd_8_obmc_sub_pixel_variance8x4_c, 8), ObmcSubpelVarianceParams(2, 3, &aom_highbd_8_obmc_sub_pixel_variance4x8_c, 8), ObmcSubpelVarianceParams(2, 2, &aom_highbd_8_obmc_sub_pixel_variance4x4_c, 8), ObmcSubpelVarianceParams(7, 7, &aom_highbd_10_obmc_sub_pixel_variance128x128_c, 10), ObmcSubpelVarianceParams(7, 6, &aom_highbd_10_obmc_sub_pixel_variance128x64_c, 10), ObmcSubpelVarianceParams(6, 7, &aom_highbd_10_obmc_sub_pixel_variance64x128_c, 10), ObmcSubpelVarianceParams(6, 6, &aom_highbd_10_obmc_sub_pixel_variance64x64_c, 10), ObmcSubpelVarianceParams(6, 5, &aom_highbd_10_obmc_sub_pixel_variance64x32_c, 10), ObmcSubpelVarianceParams(5, 6, &aom_highbd_10_obmc_sub_pixel_variance32x64_c, 10), ObmcSubpelVarianceParams(5, 5, &aom_highbd_10_obmc_sub_pixel_variance32x32_c, 10), ObmcSubpelVarianceParams(5, 4, &aom_highbd_10_obmc_sub_pixel_variance32x16_c, 10), ObmcSubpelVarianceParams(4, 5, &aom_highbd_10_obmc_sub_pixel_variance16x32_c, 10), ObmcSubpelVarianceParams(4, 4, &aom_highbd_10_obmc_sub_pixel_variance16x16_c, 10), ObmcSubpelVarianceParams(4, 3, &aom_highbd_10_obmc_sub_pixel_variance16x8_c, 10), ObmcSubpelVarianceParams(3, 4, &aom_highbd_10_obmc_sub_pixel_variance8x16_c, 10), ObmcSubpelVarianceParams(3, 3, &aom_highbd_10_obmc_sub_pixel_variance8x8_c, 10), ObmcSubpelVarianceParams(3, 2, &aom_highbd_10_obmc_sub_pixel_variance8x4_c, 10), ObmcSubpelVarianceParams(2, 3, &aom_highbd_10_obmc_sub_pixel_variance4x8_c, 10), ObmcSubpelVarianceParams(2, 2, &aom_highbd_10_obmc_sub_pixel_variance4x4_c, 10), ObmcSubpelVarianceParams(7, 7, &aom_highbd_12_obmc_sub_pixel_variance128x128_c, 12), ObmcSubpelVarianceParams(7, 6, &aom_highbd_12_obmc_sub_pixel_variance128x64_c, 12), ObmcSubpelVarianceParams(6, 7, &aom_highbd_12_obmc_sub_pixel_variance64x128_c, 12), ObmcSubpelVarianceParams(6, 6, &aom_highbd_12_obmc_sub_pixel_variance64x64_c, 12), ObmcSubpelVarianceParams(6, 5, &aom_highbd_12_obmc_sub_pixel_variance64x32_c, 12), ObmcSubpelVarianceParams(5, 6, &aom_highbd_12_obmc_sub_pixel_variance32x64_c, 12), ObmcSubpelVarianceParams(5, 5, &aom_highbd_12_obmc_sub_pixel_variance32x32_c, 12), ObmcSubpelVarianceParams(5, 4, &aom_highbd_12_obmc_sub_pixel_variance32x16_c, 12), ObmcSubpelVarianceParams(4, 5, &aom_highbd_12_obmc_sub_pixel_variance16x32_c, 12), ObmcSubpelVarianceParams(4, 4, &aom_highbd_12_obmc_sub_pixel_variance16x16_c, 12), ObmcSubpelVarianceParams(4, 3, &aom_highbd_12_obmc_sub_pixel_variance16x8_c, 12), ObmcSubpelVarianceParams(3, 4, &aom_highbd_12_obmc_sub_pixel_variance8x16_c, 12), ObmcSubpelVarianceParams(3, 3, &aom_highbd_12_obmc_sub_pixel_variance8x8_c, 12), ObmcSubpelVarianceParams(3, 2, &aom_highbd_12_obmc_sub_pixel_variance8x4_c, 12), ObmcSubpelVarianceParams(2, 3, &aom_highbd_12_obmc_sub_pixel_variance4x8_c, 12), ObmcSubpelVarianceParams(2, 2, &aom_highbd_12_obmc_sub_pixel_variance4x4_c, 12), ObmcSubpelVarianceParams(6, 4, &aom_highbd_8_obmc_sub_pixel_variance64x16_c, 8), ObmcSubpelVarianceParams(4, 6, &aom_highbd_8_obmc_sub_pixel_variance16x64_c, 8), ObmcSubpelVarianceParams(5, 3, &aom_highbd_8_obmc_sub_pixel_variance32x8_c, 8), ObmcSubpelVarianceParams(3, 5, &aom_highbd_8_obmc_sub_pixel_variance8x32_c, 8), ObmcSubpelVarianceParams(4, 2, &aom_highbd_8_obmc_sub_pixel_variance16x4_c, 8), ObmcSubpelVarianceParams(2, 4, &aom_highbd_8_obmc_sub_pixel_variance4x16_c, 8), ObmcSubpelVarianceParams(6, 4, &aom_highbd_10_obmc_sub_pixel_variance64x16_c, 10), ObmcSubpelVarianceParams(4, 6, &aom_highbd_10_obmc_sub_pixel_variance16x64_c, 10), ObmcSubpelVarianceParams(5, 3, &aom_highbd_10_obmc_sub_pixel_variance32x8_c, 10), ObmcSubpelVarianceParams(3, 5, &aom_highbd_10_obmc_sub_pixel_variance8x32_c, 10), ObmcSubpelVarianceParams(4, 2, &aom_highbd_10_obmc_sub_pixel_variance16x4_c, 10), ObmcSubpelVarianceParams(2, 4, &aom_highbd_10_obmc_sub_pixel_variance4x16_c, 10), ObmcSubpelVarianceParams(6, 4, &aom_highbd_12_obmc_sub_pixel_variance64x16_c, 12), ObmcSubpelVarianceParams(4, 6, &aom_highbd_12_obmc_sub_pixel_variance16x64_c, 12), ObmcSubpelVarianceParams(5, 3, &aom_highbd_12_obmc_sub_pixel_variance32x8_c, 12), ObmcSubpelVarianceParams(3, 5, &aom_highbd_12_obmc_sub_pixel_variance8x32_c, 12), ObmcSubpelVarianceParams(4, 2, &aom_highbd_12_obmc_sub_pixel_variance16x4_c, 12), ObmcSubpelVarianceParams(2, 4, &aom_highbd_12_obmc_sub_pixel_variance4x16_c, 12), }; INSTANTIATE_TEST_SUITE_P(C, AvxHBDObmcSubpelVarianceTest, ::testing::ValuesIn(kArrayHBDObmcSubpelVariance_c)); #endif // !CONFIG_REALTIME_ONLY #endif // CONFIG_AV1_HIGHBITDEPTH #if HAVE_SSE2 INSTANTIATE_TEST_SUITE_P( SSE2, MseWxHTest, ::testing::Values(MseWxHParams(3, 3, &aom_mse_wxh_16bit_sse2, 8), MseWxHParams(3, 2, &aom_mse_wxh_16bit_sse2, 8), MseWxHParams(2, 3, &aom_mse_wxh_16bit_sse2, 8), MseWxHParams(2, 2, &aom_mse_wxh_16bit_sse2, 8))); INSTANTIATE_TEST_SUITE_P( SSE2, Mse16xHTest, ::testing::Values(Mse16xHParams(3, 3, &aom_mse_16xh_16bit_sse2, 8), Mse16xHParams(3, 2, &aom_mse_16xh_16bit_sse2, 8), Mse16xHParams(2, 3, &aom_mse_16xh_16bit_sse2, 8), Mse16xHParams(2, 2, &aom_mse_16xh_16bit_sse2, 8))); INSTANTIATE_TEST_SUITE_P(SSE2, SumOfSquaresTest, ::testing::Values(aom_get_mb_ss_sse2)); INSTANTIATE_TEST_SUITE_P(SSE2, AvxMseTest, ::testing::Values(MseParams(4, 4, &aom_mse16x16_sse2), MseParams(4, 3, &aom_mse16x8_sse2), MseParams(3, 4, &aom_mse8x16_sse2), MseParams(3, 3, &aom_mse8x8_sse2))); const VarianceParams kArrayVariance_sse2[] = { VarianceParams(7, 7, &aom_variance128x128_sse2), VarianceParams(7, 6, &aom_variance128x64_sse2), VarianceParams(6, 7, &aom_variance64x128_sse2), VarianceParams(6, 6, &aom_variance64x64_sse2), VarianceParams(6, 5, &aom_variance64x32_sse2), VarianceParams(5, 6, &aom_variance32x64_sse2), VarianceParams(5, 5, &aom_variance32x32_sse2), VarianceParams(5, 4, &aom_variance32x16_sse2), VarianceParams(4, 5, &aom_variance16x32_sse2), VarianceParams(4, 4, &aom_variance16x16_sse2), VarianceParams(4, 3, &aom_variance16x8_sse2), VarianceParams(3, 4, &aom_variance8x16_sse2), VarianceParams(3, 3, &aom_variance8x8_sse2), VarianceParams(3, 2, &aom_variance8x4_sse2), VarianceParams(2, 3, &aom_variance4x8_sse2), VarianceParams(2, 2, &aom_variance4x4_sse2), #if !CONFIG_REALTIME_ONLY VarianceParams(6, 4, &aom_variance64x16_sse2), VarianceParams(5, 3, &aom_variance32x8_sse2), VarianceParams(4, 6, &aom_variance16x64_sse2), VarianceParams(4, 2, &aom_variance16x4_sse2), VarianceParams(3, 5, &aom_variance8x32_sse2), VarianceParams(2, 4, &aom_variance4x16_sse2), #endif }; INSTANTIATE_TEST_SUITE_P(SSE2, AvxVarianceTest, ::testing::ValuesIn(kArrayVariance_sse2)); const GetSseSumParams kArrayGetSseSum8x8Quad_sse2[] = { GetSseSumParams(7, 7, &aom_get_var_sse_sum_8x8_quad_sse2, 0), GetSseSumParams(6, 6, &aom_get_var_sse_sum_8x8_quad_sse2, 0), GetSseSumParams(5, 5, &aom_get_var_sse_sum_8x8_quad_sse2, 0), GetSseSumParams(5, 4, &aom_get_var_sse_sum_8x8_quad_sse2, 0) }; INSTANTIATE_TEST_SUITE_P(SSE2, GetSseSum8x8QuadTest, ::testing::ValuesIn(kArrayGetSseSum8x8Quad_sse2)); const GetSseSumParamsDual kArrayGetSseSum16x16Dual_sse2[] = { GetSseSumParamsDual(7, 7, &aom_get_var_sse_sum_16x16_dual_sse2, 0), GetSseSumParamsDual(6, 6, &aom_get_var_sse_sum_16x16_dual_sse2, 0), GetSseSumParamsDual(5, 5, &aom_get_var_sse_sum_16x16_dual_sse2, 0), GetSseSumParamsDual(5, 4, &aom_get_var_sse_sum_16x16_dual_sse2, 0) }; INSTANTIATE_TEST_SUITE_P(SSE2, GetSseSum16x16DualTest, ::testing::ValuesIn(kArrayGetSseSum16x16Dual_sse2)); #if CONFIG_AV1_HIGHBITDEPTH #if HAVE_SSE2 INSTANTIATE_TEST_SUITE_P( SSE2, MseHBDWxHTest, ::testing::Values(MseHBDWxHParams(3, 3, &aom_mse_wxh_16bit_highbd_sse2, 10), MseHBDWxHParams(3, 2, &aom_mse_wxh_16bit_highbd_sse2, 10), MseHBDWxHParams(2, 3, &aom_mse_wxh_16bit_highbd_sse2, 10), MseHBDWxHParams(2, 2, &aom_mse_wxh_16bit_highbd_sse2, 10))); INSTANTIATE_TEST_SUITE_P( SSE2, AvxHBDMseTest, ::testing::Values(MseParams(4, 4, &aom_highbd_12_mse16x16_sse2, 12), MseParams(3, 3, &aom_highbd_12_mse8x8_sse2, 12), MseParams(4, 4, &aom_highbd_10_mse16x16_sse2, 10), MseParams(3, 3, &aom_highbd_10_mse8x8_sse2, 10), MseParams(4, 4, &aom_highbd_8_mse16x16_sse2, 8), MseParams(3, 3, &aom_highbd_8_mse8x8_sse2, 8))); #endif // HAVE_SSE2 #if HAVE_SSE4_1 INSTANTIATE_TEST_SUITE_P( SSE4_1, AvxSubpelVarianceTest, ::testing::Values( SubpelVarianceParams(2, 2, &aom_highbd_8_sub_pixel_variance4x4_sse4_1, 8), SubpelVarianceParams(2, 2, &aom_highbd_10_sub_pixel_variance4x4_sse4_1, 10), SubpelVarianceParams(2, 2, &aom_highbd_12_sub_pixel_variance4x4_sse4_1, 12))); INSTANTIATE_TEST_SUITE_P( SSE4_1, AvxSubpelAvgVarianceTest, ::testing::Values( SubpelAvgVarianceParams(2, 2, &aom_highbd_8_sub_pixel_avg_variance4x4_sse4_1, 8), SubpelAvgVarianceParams(2, 2, &aom_highbd_10_sub_pixel_avg_variance4x4_sse4_1, 10), SubpelAvgVarianceParams(2, 2, &aom_highbd_12_sub_pixel_avg_variance4x4_sse4_1, 12))); #endif // HAVE_SSE4_1 #if HAVE_AVX2 INSTANTIATE_TEST_SUITE_P( AVX2, AvxHBDMseTest, ::testing::Values(MseParams(4, 4, &aom_highbd_10_mse16x16_avx2, 10))); #endif // HAVE_AVX2 const VarianceParams kArrayHBDVariance_sse2[] = { VarianceParams(7, 7, &aom_highbd_12_variance128x128_sse2, 12), VarianceParams(7, 6, &aom_highbd_12_variance128x64_sse2, 12), VarianceParams(6, 7, &aom_highbd_12_variance64x128_sse2, 12), VarianceParams(6, 6, &aom_highbd_12_variance64x64_sse2, 12), VarianceParams(6, 5, &aom_highbd_12_variance64x32_sse2, 12), VarianceParams(5, 6, &aom_highbd_12_variance32x64_sse2, 12), VarianceParams(5, 5, &aom_highbd_12_variance32x32_sse2, 12), VarianceParams(5, 4, &aom_highbd_12_variance32x16_sse2, 12), VarianceParams(4, 5, &aom_highbd_12_variance16x32_sse2, 12), VarianceParams(4, 4, &aom_highbd_12_variance16x16_sse2, 12), VarianceParams(4, 3, &aom_highbd_12_variance16x8_sse2, 12), VarianceParams(3, 4, &aom_highbd_12_variance8x16_sse2, 12), VarianceParams(3, 3, &aom_highbd_12_variance8x8_sse2, 12), VarianceParams(7, 7, &aom_highbd_10_variance128x128_sse2, 10), VarianceParams(7, 6, &aom_highbd_10_variance128x64_sse2, 10), VarianceParams(6, 7, &aom_highbd_10_variance64x128_sse2, 10), VarianceParams(6, 6, &aom_highbd_10_variance64x64_sse2, 10), VarianceParams(6, 5, &aom_highbd_10_variance64x32_sse2, 10), VarianceParams(5, 6, &aom_highbd_10_variance32x64_sse2, 10), VarianceParams(5, 5, &aom_highbd_10_variance32x32_sse2, 10), VarianceParams(5, 4, &aom_highbd_10_variance32x16_sse2, 10), VarianceParams(4, 5, &aom_highbd_10_variance16x32_sse2, 10), VarianceParams(4, 4, &aom_highbd_10_variance16x16_sse2, 10), VarianceParams(4, 3, &aom_highbd_10_variance16x8_sse2, 10), VarianceParams(3, 4, &aom_highbd_10_variance8x16_sse2, 10), VarianceParams(3, 3, &aom_highbd_10_variance8x8_sse2, 10), VarianceParams(7, 7, &aom_highbd_8_variance128x128_sse2, 8), VarianceParams(7, 6, &aom_highbd_8_variance128x64_sse2, 8), VarianceParams(6, 7, &aom_highbd_8_variance64x128_sse2, 8), VarianceParams(6, 6, &aom_highbd_8_variance64x64_sse2, 8), VarianceParams(6, 5, &aom_highbd_8_variance64x32_sse2, 8), VarianceParams(5, 6, &aom_highbd_8_variance32x64_sse2, 8), VarianceParams(5, 5, &aom_highbd_8_variance32x32_sse2, 8), VarianceParams(5, 4, &aom_highbd_8_variance32x16_sse2, 8), VarianceParams(4, 5, &aom_highbd_8_variance16x32_sse2, 8), VarianceParams(4, 4, &aom_highbd_8_variance16x16_sse2, 8), VarianceParams(4, 3, &aom_highbd_8_variance16x8_sse2, 8), VarianceParams(3, 4, &aom_highbd_8_variance8x16_sse2, 8), VarianceParams(3, 3, &aom_highbd_8_variance8x8_sse2, 8), #if !CONFIG_REALTIME_ONLY VarianceParams(6, 4, &aom_highbd_12_variance64x16_sse2, 12), VarianceParams(4, 6, &aom_highbd_12_variance16x64_sse2, 12), VarianceParams(5, 3, &aom_highbd_12_variance32x8_sse2, 12), VarianceParams(3, 5, &aom_highbd_12_variance8x32_sse2, 12), // VarianceParams(4, 2, &aom_highbd_12_variance16x4_sse2, 12), // VarianceParams(2, 4, &aom_highbd_12_variance4x16_sse2, 12), VarianceParams(6, 4, &aom_highbd_10_variance64x16_sse2, 10), VarianceParams(4, 6, &aom_highbd_10_variance16x64_sse2, 10), VarianceParams(5, 3, &aom_highbd_10_variance32x8_sse2, 10), VarianceParams(3, 5, &aom_highbd_10_variance8x32_sse2, 10), // VarianceParams(4, 2, &aom_highbd_10_variance16x4_sse2, 10), // VarianceParams(2, 4, &aom_highbd_10_variance4x16_sse2, 10), VarianceParams(6, 4, &aom_highbd_8_variance64x16_sse2, 8), VarianceParams(4, 6, &aom_highbd_8_variance16x64_sse2, 8), VarianceParams(5, 3, &aom_highbd_8_variance32x8_sse2, 8), VarianceParams(3, 5, &aom_highbd_8_variance8x32_sse2, 8), // VarianceParams(4, 2, &aom_highbd_8_variance16x4_sse2, 8), // VarianceParams(2, 4, &aom_highbd_8_variance4x16_sse2, 8), #endif }; INSTANTIATE_TEST_SUITE_P(SSE2, AvxHBDVarianceTest, ::testing::ValuesIn(kArrayHBDVariance_sse2)); #if HAVE_AVX2 INSTANTIATE_TEST_SUITE_P( AVX2, MseHBDWxHTest, ::testing::Values(MseHBDWxHParams(3, 3, &aom_mse_wxh_16bit_highbd_avx2, 10), MseHBDWxHParams(3, 2, &aom_mse_wxh_16bit_highbd_avx2, 10), MseHBDWxHParams(2, 3, &aom_mse_wxh_16bit_highbd_avx2, 10), MseHBDWxHParams(2, 2, &aom_mse_wxh_16bit_highbd_avx2, 10))); const VarianceParams kArrayHBDVariance_avx2[] = { VarianceParams(7, 7, &aom_highbd_10_variance128x128_avx2, 10), VarianceParams(7, 6, &aom_highbd_10_variance128x64_avx2, 10), VarianceParams(6, 7, &aom_highbd_10_variance64x128_avx2, 10), VarianceParams(6, 6, &aom_highbd_10_variance64x64_avx2, 10), VarianceParams(6, 5, &aom_highbd_10_variance64x32_avx2, 10), VarianceParams(5, 6, &aom_highbd_10_variance32x64_avx2, 10), VarianceParams(5, 5, &aom_highbd_10_variance32x32_avx2, 10), VarianceParams(5, 4, &aom_highbd_10_variance32x16_avx2, 10), VarianceParams(4, 5, &aom_highbd_10_variance16x32_avx2, 10), VarianceParams(4, 4, &aom_highbd_10_variance16x16_avx2, 10), VarianceParams(4, 3, &aom_highbd_10_variance16x8_avx2, 10), VarianceParams(3, 4, &aom_highbd_10_variance8x16_avx2, 10), VarianceParams(3, 3, &aom_highbd_10_variance8x8_avx2, 10), #if !CONFIG_REALTIME_ONLY VarianceParams(6, 4, &aom_highbd_10_variance64x16_avx2, 10), VarianceParams(5, 3, &aom_highbd_10_variance32x8_avx2, 10), VarianceParams(4, 6, &aom_highbd_10_variance16x64_avx2, 10), VarianceParams(3, 5, &aom_highbd_10_variance8x32_avx2, 10), #endif }; INSTANTIATE_TEST_SUITE_P(AVX2, AvxHBDVarianceTest, ::testing::ValuesIn(kArrayHBDVariance_avx2)); const SubpelVarianceParams kArrayHBDSubpelVariance_avx2[] = { SubpelVarianceParams(7, 7, &aom_highbd_10_sub_pixel_variance128x128_avx2, 10), SubpelVarianceParams(7, 6, &aom_highbd_10_sub_pixel_variance128x64_avx2, 10), SubpelVarianceParams(6, 7, &aom_highbd_10_sub_pixel_variance64x128_avx2, 10), SubpelVarianceParams(6, 6, &aom_highbd_10_sub_pixel_variance64x64_avx2, 10), SubpelVarianceParams(6, 5, &aom_highbd_10_sub_pixel_variance64x32_avx2, 10), SubpelVarianceParams(5, 6, &aom_highbd_10_sub_pixel_variance32x64_avx2, 10), SubpelVarianceParams(5, 5, &aom_highbd_10_sub_pixel_variance32x32_avx2, 10), SubpelVarianceParams(5, 4, &aom_highbd_10_sub_pixel_variance32x16_avx2, 10), SubpelVarianceParams(4, 5, &aom_highbd_10_sub_pixel_variance16x32_avx2, 10), SubpelVarianceParams(4, 4, &aom_highbd_10_sub_pixel_variance16x16_avx2, 10), SubpelVarianceParams(4, 3, &aom_highbd_10_sub_pixel_variance16x8_avx2, 10), SubpelVarianceParams(3, 4, &aom_highbd_10_sub_pixel_variance8x16_avx2, 10), SubpelVarianceParams(3, 3, &aom_highbd_10_sub_pixel_variance8x8_avx2, 10), }; INSTANTIATE_TEST_SUITE_P(AVX2, AvxHBDSubpelVarianceTest, ::testing::ValuesIn(kArrayHBDSubpelVariance_avx2)); #endif // HAVE_AVX2 const SubpelVarianceParams kArrayHBDSubpelVariance_sse2[] = { SubpelVarianceParams(7, 7, &aom_highbd_12_sub_pixel_variance128x128_sse2, 12), SubpelVarianceParams(7, 6, &aom_highbd_12_sub_pixel_variance128x64_sse2, 12), SubpelVarianceParams(6, 7, &aom_highbd_12_sub_pixel_variance64x128_sse2, 12), SubpelVarianceParams(6, 6, &aom_highbd_12_sub_pixel_variance64x64_sse2, 12), SubpelVarianceParams(6, 5, &aom_highbd_12_sub_pixel_variance64x32_sse2, 12), SubpelVarianceParams(5, 6, &aom_highbd_12_sub_pixel_variance32x64_sse2, 12), SubpelVarianceParams(5, 5, &aom_highbd_12_sub_pixel_variance32x32_sse2, 12), SubpelVarianceParams(5, 4, &aom_highbd_12_sub_pixel_variance32x16_sse2, 12), SubpelVarianceParams(4, 5, &aom_highbd_12_sub_pixel_variance16x32_sse2, 12), SubpelVarianceParams(4, 4, &aom_highbd_12_sub_pixel_variance16x16_sse2, 12), SubpelVarianceParams(4, 3, &aom_highbd_12_sub_pixel_variance16x8_sse2, 12), SubpelVarianceParams(3, 4, &aom_highbd_12_sub_pixel_variance8x16_sse2, 12), SubpelVarianceParams(3, 3, &aom_highbd_12_sub_pixel_variance8x8_sse2, 12), SubpelVarianceParams(3, 2, &aom_highbd_12_sub_pixel_variance8x4_sse2, 12), SubpelVarianceParams(7, 7, &aom_highbd_10_sub_pixel_variance128x128_sse2, 10), SubpelVarianceParams(7, 6, &aom_highbd_10_sub_pixel_variance128x64_sse2, 10), SubpelVarianceParams(6, 7, &aom_highbd_10_sub_pixel_variance64x128_sse2, 10), SubpelVarianceParams(6, 6, &aom_highbd_10_sub_pixel_variance64x64_sse2, 10), SubpelVarianceParams(6, 5, &aom_highbd_10_sub_pixel_variance64x32_sse2, 10), SubpelVarianceParams(5, 6, &aom_highbd_10_sub_pixel_variance32x64_sse2, 10), SubpelVarianceParams(5, 5, &aom_highbd_10_sub_pixel_variance32x32_sse2, 10), SubpelVarianceParams(5, 4, &aom_highbd_10_sub_pixel_variance32x16_sse2, 10), SubpelVarianceParams(4, 5, &aom_highbd_10_sub_pixel_variance16x32_sse2, 10), SubpelVarianceParams(4, 4, &aom_highbd_10_sub_pixel_variance16x16_sse2, 10), SubpelVarianceParams(4, 3, &aom_highbd_10_sub_pixel_variance16x8_sse2, 10), SubpelVarianceParams(3, 4, &aom_highbd_10_sub_pixel_variance8x16_sse2, 10), SubpelVarianceParams(3, 3, &aom_highbd_10_sub_pixel_variance8x8_sse2, 10), SubpelVarianceParams(3, 2, &aom_highbd_10_sub_pixel_variance8x4_sse2, 10), SubpelVarianceParams(7, 7, &aom_highbd_8_sub_pixel_variance128x128_sse2, 8), SubpelVarianceParams(7, 6, &aom_highbd_8_sub_pixel_variance128x64_sse2, 8), SubpelVarianceParams(6, 7, &aom_highbd_8_sub_pixel_variance64x128_sse2, 8), SubpelVarianceParams(6, 6, &aom_highbd_8_sub_pixel_variance64x64_sse2, 8), SubpelVarianceParams(6, 5, &aom_highbd_8_sub_pixel_variance64x32_sse2, 8), SubpelVarianceParams(5, 6, &aom_highbd_8_sub_pixel_variance32x64_sse2, 8), SubpelVarianceParams(5, 5, &aom_highbd_8_sub_pixel_variance32x32_sse2, 8), SubpelVarianceParams(5, 4, &aom_highbd_8_sub_pixel_variance32x16_sse2, 8), SubpelVarianceParams(4, 5, &aom_highbd_8_sub_pixel_variance16x32_sse2, 8), SubpelVarianceParams(4, 4, &aom_highbd_8_sub_pixel_variance16x16_sse2, 8), SubpelVarianceParams(4, 3, &aom_highbd_8_sub_pixel_variance16x8_sse2, 8), SubpelVarianceParams(3, 4, &aom_highbd_8_sub_pixel_variance8x16_sse2, 8), SubpelVarianceParams(3, 3, &aom_highbd_8_sub_pixel_variance8x8_sse2, 8), SubpelVarianceParams(3, 2, &aom_highbd_8_sub_pixel_variance8x4_sse2, 8), #if !CONFIG_REALTIME_ONLY SubpelVarianceParams(6, 4, &aom_highbd_12_sub_pixel_variance64x16_sse2, 12), SubpelVarianceParams(4, 6, &aom_highbd_12_sub_pixel_variance16x64_sse2, 12), SubpelVarianceParams(5, 3, &aom_highbd_12_sub_pixel_variance32x8_sse2, 12), SubpelVarianceParams(3, 5, &aom_highbd_12_sub_pixel_variance8x32_sse2, 12), SubpelVarianceParams(4, 2, &aom_highbd_12_sub_pixel_variance16x4_sse2, 12), // SubpelVarianceParams(2, 4, &aom_highbd_12_sub_pixel_variance4x16_sse2, 12), SubpelVarianceParams(6, 4, &aom_highbd_10_sub_pixel_variance64x16_sse2, 10), SubpelVarianceParams(4, 6, &aom_highbd_10_sub_pixel_variance16x64_sse2, 10), SubpelVarianceParams(5, 3, &aom_highbd_10_sub_pixel_variance32x8_sse2, 10), SubpelVarianceParams(3, 5, &aom_highbd_10_sub_pixel_variance8x32_sse2, 10), SubpelVarianceParams(4, 2, &aom_highbd_10_sub_pixel_variance16x4_sse2, 10), // SubpelVarianceParams(2, 4, &aom_highbd_10_sub_pixel_variance4x16_sse2, 10), SubpelVarianceParams(6, 4, &aom_highbd_8_sub_pixel_variance64x16_sse2, 8), SubpelVarianceParams(4, 6, &aom_highbd_8_sub_pixel_variance16x64_sse2, 8), SubpelVarianceParams(5, 3, &aom_highbd_8_sub_pixel_variance32x8_sse2, 8), SubpelVarianceParams(3, 5, &aom_highbd_8_sub_pixel_variance8x32_sse2, 8), SubpelVarianceParams(4, 2, &aom_highbd_8_sub_pixel_variance16x4_sse2, 8), // SubpelVarianceParams(2, 4, &aom_highbd_8_sub_pixel_variance4x16_sse2, 8), #endif }; INSTANTIATE_TEST_SUITE_P(SSE2, AvxHBDSubpelVarianceTest, ::testing::ValuesIn(kArrayHBDSubpelVariance_sse2)); const SubpelAvgVarianceParams kArrayHBDSubpelAvgVariance_sse2[] = { SubpelAvgVarianceParams(6, 6, &aom_highbd_12_sub_pixel_avg_variance64x64_sse2, 12), SubpelAvgVarianceParams(6, 5, &aom_highbd_12_sub_pixel_avg_variance64x32_sse2, 12), SubpelAvgVarianceParams(5, 6, &aom_highbd_12_sub_pixel_avg_variance32x64_sse2, 12), SubpelAvgVarianceParams(5, 5, &aom_highbd_12_sub_pixel_avg_variance32x32_sse2, 12), SubpelAvgVarianceParams(5, 4, &aom_highbd_12_sub_pixel_avg_variance32x16_sse2, 12), SubpelAvgVarianceParams(4, 5, &aom_highbd_12_sub_pixel_avg_variance16x32_sse2, 12), SubpelAvgVarianceParams(4, 4, &aom_highbd_12_sub_pixel_avg_variance16x16_sse2, 12), SubpelAvgVarianceParams(4, 3, &aom_highbd_12_sub_pixel_avg_variance16x8_sse2, 12), SubpelAvgVarianceParams(3, 4, &aom_highbd_12_sub_pixel_avg_variance8x16_sse2, 12), SubpelAvgVarianceParams(3, 3, &aom_highbd_12_sub_pixel_avg_variance8x8_sse2, 12), SubpelAvgVarianceParams(3, 2, &aom_highbd_12_sub_pixel_avg_variance8x4_sse2, 12), SubpelAvgVarianceParams(6, 6, &aom_highbd_10_sub_pixel_avg_variance64x64_sse2, 10), SubpelAvgVarianceParams(6, 5, &aom_highbd_10_sub_pixel_avg_variance64x32_sse2, 10), SubpelAvgVarianceParams(5, 6, &aom_highbd_10_sub_pixel_avg_variance32x64_sse2, 10), SubpelAvgVarianceParams(5, 5, &aom_highbd_10_sub_pixel_avg_variance32x32_sse2, 10), SubpelAvgVarianceParams(5, 4, &aom_highbd_10_sub_pixel_avg_variance32x16_sse2, 10), SubpelAvgVarianceParams(4, 5, &aom_highbd_10_sub_pixel_avg_variance16x32_sse2, 10), SubpelAvgVarianceParams(4, 4, &aom_highbd_10_sub_pixel_avg_variance16x16_sse2, 10), SubpelAvgVarianceParams(4, 3, &aom_highbd_10_sub_pixel_avg_variance16x8_sse2, 10), SubpelAvgVarianceParams(3, 4, &aom_highbd_10_sub_pixel_avg_variance8x16_sse2, 10), SubpelAvgVarianceParams(3, 3, &aom_highbd_10_sub_pixel_avg_variance8x8_sse2, 10), SubpelAvgVarianceParams(3, 2, &aom_highbd_10_sub_pixel_avg_variance8x4_sse2, 10), SubpelAvgVarianceParams(6, 6, &aom_highbd_8_sub_pixel_avg_variance64x64_sse2, 8), SubpelAvgVarianceParams(6, 5, &aom_highbd_8_sub_pixel_avg_variance64x32_sse2, 8), SubpelAvgVarianceParams(5, 6, &aom_highbd_8_sub_pixel_avg_variance32x64_sse2, 8), SubpelAvgVarianceParams(5, 5, &aom_highbd_8_sub_pixel_avg_variance32x32_sse2, 8), SubpelAvgVarianceParams(5, 4, &aom_highbd_8_sub_pixel_avg_variance32x16_sse2, 8), SubpelAvgVarianceParams(4, 5, &aom_highbd_8_sub_pixel_avg_variance16x32_sse2, 8), SubpelAvgVarianceParams(4, 4, &aom_highbd_8_sub_pixel_avg_variance16x16_sse2, 8), SubpelAvgVarianceParams(4, 3, &aom_highbd_8_sub_pixel_avg_variance16x8_sse2, 8), SubpelAvgVarianceParams(3, 4, &aom_highbd_8_sub_pixel_avg_variance8x16_sse2, 8), SubpelAvgVarianceParams(3, 3, &aom_highbd_8_sub_pixel_avg_variance8x8_sse2, 8), SubpelAvgVarianceParams(3, 2, &aom_highbd_8_sub_pixel_avg_variance8x4_sse2, 8), #if !CONFIG_REALTIME_ONLY SubpelAvgVarianceParams(6, 4, &aom_highbd_12_sub_pixel_avg_variance64x16_sse2, 12), SubpelAvgVarianceParams(4, 6, &aom_highbd_12_sub_pixel_avg_variance16x64_sse2, 12), SubpelAvgVarianceParams(5, 3, &aom_highbd_12_sub_pixel_avg_variance32x8_sse2, 12), SubpelAvgVarianceParams(3, 5, &aom_highbd_12_sub_pixel_avg_variance8x32_sse2, 12), SubpelAvgVarianceParams(4, 2, &aom_highbd_12_sub_pixel_avg_variance16x4_sse2, 12), // SubpelAvgVarianceParams(2, 4, // &aom_highbd_12_sub_pixel_avg_variance4x16_sse2, 12), SubpelAvgVarianceParams(6, 4, &aom_highbd_10_sub_pixel_avg_variance64x16_sse2, 10), SubpelAvgVarianceParams(4, 6, &aom_highbd_10_sub_pixel_avg_variance16x64_sse2, 10), SubpelAvgVarianceParams(5, 3, &aom_highbd_10_sub_pixel_avg_variance32x8_sse2, 10), SubpelAvgVarianceParams(3, 5, &aom_highbd_10_sub_pixel_avg_variance8x32_sse2, 10), SubpelAvgVarianceParams(4, 2, &aom_highbd_10_sub_pixel_avg_variance16x4_sse2, 10), // SubpelAvgVarianceParams(2, 4, // &aom_highbd_10_sub_pixel_avg_variance4x16_sse2, 10), SubpelAvgVarianceParams(6, 4, &aom_highbd_8_sub_pixel_avg_variance64x16_sse2, 8), SubpelAvgVarianceParams(4, 6, &aom_highbd_8_sub_pixel_avg_variance16x64_sse2, 8), SubpelAvgVarianceParams(5, 3, &aom_highbd_8_sub_pixel_avg_variance32x8_sse2, 8), SubpelAvgVarianceParams(3, 5, &aom_highbd_8_sub_pixel_avg_variance8x32_sse2, 8), SubpelAvgVarianceParams(4, 2, &aom_highbd_8_sub_pixel_avg_variance16x4_sse2, 8), // SubpelAvgVarianceParams(2, 4, // &aom_highbd_8_sub_pixel_avg_variance4x16_sse2, 8), #endif }; INSTANTIATE_TEST_SUITE_P(SSE2, AvxHBDSubpelAvgVarianceTest, ::testing::ValuesIn(kArrayHBDSubpelAvgVariance_sse2)); #endif // HAVE_SSE2 #endif // CONFIG_AV1_HIGHBITDEPTH #if HAVE_SSSE3 const SubpelVarianceParams kArraySubpelVariance_ssse3[] = { SubpelVarianceParams(7, 7, &aom_sub_pixel_variance128x128_ssse3, 0), SubpelVarianceParams(7, 6, &aom_sub_pixel_variance128x64_ssse3, 0), SubpelVarianceParams(6, 7, &aom_sub_pixel_variance64x128_ssse3, 0), SubpelVarianceParams(6, 6, &aom_sub_pixel_variance64x64_ssse3, 0), SubpelVarianceParams(6, 5, &aom_sub_pixel_variance64x32_ssse3, 0), SubpelVarianceParams(5, 6, &aom_sub_pixel_variance32x64_ssse3, 0), SubpelVarianceParams(5, 5, &aom_sub_pixel_variance32x32_ssse3, 0), SubpelVarianceParams(5, 4, &aom_sub_pixel_variance32x16_ssse3, 0), SubpelVarianceParams(4, 5, &aom_sub_pixel_variance16x32_ssse3, 0), SubpelVarianceParams(4, 4, &aom_sub_pixel_variance16x16_ssse3, 0), SubpelVarianceParams(4, 3, &aom_sub_pixel_variance16x8_ssse3, 0), SubpelVarianceParams(3, 4, &aom_sub_pixel_variance8x16_ssse3, 0), SubpelVarianceParams(3, 3, &aom_sub_pixel_variance8x8_ssse3, 0), SubpelVarianceParams(3, 2, &aom_sub_pixel_variance8x4_ssse3, 0), SubpelVarianceParams(2, 3, &aom_sub_pixel_variance4x8_ssse3, 0), SubpelVarianceParams(2, 2, &aom_sub_pixel_variance4x4_ssse3, 0), #if !CONFIG_REALTIME_ONLY SubpelVarianceParams(6, 4, &aom_sub_pixel_variance64x16_ssse3, 0), SubpelVarianceParams(4, 6, &aom_sub_pixel_variance16x64_ssse3, 0), SubpelVarianceParams(5, 3, &aom_sub_pixel_variance32x8_ssse3, 0), SubpelVarianceParams(3, 5, &aom_sub_pixel_variance8x32_ssse3, 0), SubpelVarianceParams(4, 2, &aom_sub_pixel_variance16x4_ssse3, 0), SubpelVarianceParams(2, 4, &aom_sub_pixel_variance4x16_ssse3, 0), #endif }; INSTANTIATE_TEST_SUITE_P(SSSE3, AvxSubpelVarianceTest, ::testing::ValuesIn(kArraySubpelVariance_ssse3)); const SubpelAvgVarianceParams kArraySubpelAvgVariance_ssse3[] = { SubpelAvgVarianceParams(7, 7, &aom_sub_pixel_avg_variance128x128_ssse3, 0), SubpelAvgVarianceParams(7, 6, &aom_sub_pixel_avg_variance128x64_ssse3, 0), SubpelAvgVarianceParams(6, 7, &aom_sub_pixel_avg_variance64x128_ssse3, 0), SubpelAvgVarianceParams(6, 6, &aom_sub_pixel_avg_variance64x64_ssse3, 0), SubpelAvgVarianceParams(6, 5, &aom_sub_pixel_avg_variance64x32_ssse3, 0), SubpelAvgVarianceParams(5, 6, &aom_sub_pixel_avg_variance32x64_ssse3, 0), SubpelAvgVarianceParams(5, 5, &aom_sub_pixel_avg_variance32x32_ssse3, 0), SubpelAvgVarianceParams(5, 4, &aom_sub_pixel_avg_variance32x16_ssse3, 0), SubpelAvgVarianceParams(4, 5, &aom_sub_pixel_avg_variance16x32_ssse3, 0), SubpelAvgVarianceParams(4, 4, &aom_sub_pixel_avg_variance16x16_ssse3, 0), SubpelAvgVarianceParams(4, 3, &aom_sub_pixel_avg_variance16x8_ssse3, 0), SubpelAvgVarianceParams(3, 4, &aom_sub_pixel_avg_variance8x16_ssse3, 0), SubpelAvgVarianceParams(3, 3, &aom_sub_pixel_avg_variance8x8_ssse3, 0), SubpelAvgVarianceParams(3, 2, &aom_sub_pixel_avg_variance8x4_ssse3, 0), SubpelAvgVarianceParams(2, 3, &aom_sub_pixel_avg_variance4x8_ssse3, 0), SubpelAvgVarianceParams(2, 2, &aom_sub_pixel_avg_variance4x4_ssse3, 0), #if !CONFIG_REALTIME_ONLY SubpelAvgVarianceParams(6, 4, &aom_sub_pixel_avg_variance64x16_ssse3, 0), SubpelAvgVarianceParams(4, 6, &aom_sub_pixel_avg_variance16x64_ssse3, 0), SubpelAvgVarianceParams(5, 3, &aom_sub_pixel_avg_variance32x8_ssse3, 0), SubpelAvgVarianceParams(3, 5, &aom_sub_pixel_avg_variance8x32_ssse3, 0), SubpelAvgVarianceParams(4, 2, &aom_sub_pixel_avg_variance16x4_ssse3, 0), SubpelAvgVarianceParams(2, 4, &aom_sub_pixel_avg_variance4x16_ssse3, 0), #endif }; INSTANTIATE_TEST_SUITE_P(SSSE3, AvxSubpelAvgVarianceTest, ::testing::ValuesIn(kArraySubpelAvgVariance_ssse3)); const DistWtdSubpelAvgVarianceParams kArrayDistWtdSubpelAvgVariance_ssse3[] = { DistWtdSubpelAvgVarianceParams( 7, 7, &aom_dist_wtd_sub_pixel_avg_variance128x128_ssse3, 0), DistWtdSubpelAvgVarianceParams( 7, 6, &aom_dist_wtd_sub_pixel_avg_variance128x64_ssse3, 0), DistWtdSubpelAvgVarianceParams( 6, 7, &aom_dist_wtd_sub_pixel_avg_variance64x128_ssse3, 0), DistWtdSubpelAvgVarianceParams( 6, 6, &aom_dist_wtd_sub_pixel_avg_variance64x64_ssse3, 0), DistWtdSubpelAvgVarianceParams( 6, 5, &aom_dist_wtd_sub_pixel_avg_variance64x32_ssse3, 0), DistWtdSubpelAvgVarianceParams( 5, 6, &aom_dist_wtd_sub_pixel_avg_variance32x64_ssse3, 0), DistWtdSubpelAvgVarianceParams( 5, 5, &aom_dist_wtd_sub_pixel_avg_variance32x32_ssse3, 0), DistWtdSubpelAvgVarianceParams( 5, 4, &aom_dist_wtd_sub_pixel_avg_variance32x16_ssse3, 0), DistWtdSubpelAvgVarianceParams( 4, 5, &aom_dist_wtd_sub_pixel_avg_variance16x32_ssse3, 0), DistWtdSubpelAvgVarianceParams( 4, 4, &aom_dist_wtd_sub_pixel_avg_variance16x16_ssse3, 0), DistWtdSubpelAvgVarianceParams( 4, 3, &aom_dist_wtd_sub_pixel_avg_variance16x8_ssse3, 0), DistWtdSubpelAvgVarianceParams( 3, 4, &aom_dist_wtd_sub_pixel_avg_variance8x16_ssse3, 0), DistWtdSubpelAvgVarianceParams( 3, 3, &aom_dist_wtd_sub_pixel_avg_variance8x8_ssse3, 0), DistWtdSubpelAvgVarianceParams( 3, 2, &aom_dist_wtd_sub_pixel_avg_variance8x4_ssse3, 0), DistWtdSubpelAvgVarianceParams( 2, 3, &aom_dist_wtd_sub_pixel_avg_variance4x8_ssse3, 0), DistWtdSubpelAvgVarianceParams( 2, 2, &aom_dist_wtd_sub_pixel_avg_variance4x4_ssse3, 0), #if !CONFIG_REALTIME_ONLY DistWtdSubpelAvgVarianceParams( 6, 4, &aom_dist_wtd_sub_pixel_avg_variance64x16_ssse3, 0), DistWtdSubpelAvgVarianceParams( 4, 6, &aom_dist_wtd_sub_pixel_avg_variance16x64_ssse3, 0), DistWtdSubpelAvgVarianceParams( 5, 3, &aom_dist_wtd_sub_pixel_avg_variance32x8_ssse3, 0), DistWtdSubpelAvgVarianceParams( 3, 5, &aom_dist_wtd_sub_pixel_avg_variance8x32_ssse3, 0), DistWtdSubpelAvgVarianceParams( 4, 2, &aom_dist_wtd_sub_pixel_avg_variance16x4_ssse3, 0), DistWtdSubpelAvgVarianceParams( 2, 4, &aom_dist_wtd_sub_pixel_avg_variance4x16_ssse3, 0), #endif }; INSTANTIATE_TEST_SUITE_P( SSSE3, AvxDistWtdSubpelAvgVarianceTest, ::testing::ValuesIn(kArrayDistWtdSubpelAvgVariance_ssse3)); #endif // HAVE_SSSE3 #if HAVE_SSE4_1 #if !CONFIG_REALTIME_ONLY INSTANTIATE_TEST_SUITE_P( SSE4_1, AvxObmcSubpelVarianceTest, ::testing::Values( ObmcSubpelVarianceParams(7, 7, &aom_obmc_sub_pixel_variance128x128_sse4_1, 0), ObmcSubpelVarianceParams(7, 6, &aom_obmc_sub_pixel_variance128x64_sse4_1, 0), ObmcSubpelVarianceParams(6, 7, &aom_obmc_sub_pixel_variance64x128_sse4_1, 0), ObmcSubpelVarianceParams(6, 6, &aom_obmc_sub_pixel_variance64x64_sse4_1, 0), ObmcSubpelVarianceParams(6, 5, &aom_obmc_sub_pixel_variance64x32_sse4_1, 0), ObmcSubpelVarianceParams(5, 6, &aom_obmc_sub_pixel_variance32x64_sse4_1, 0), ObmcSubpelVarianceParams(5, 5, &aom_obmc_sub_pixel_variance32x32_sse4_1, 0), ObmcSubpelVarianceParams(5, 4, &aom_obmc_sub_pixel_variance32x16_sse4_1, 0), ObmcSubpelVarianceParams(4, 5, &aom_obmc_sub_pixel_variance16x32_sse4_1, 0), ObmcSubpelVarianceParams(4, 4, &aom_obmc_sub_pixel_variance16x16_sse4_1, 0), ObmcSubpelVarianceParams(4, 3, &aom_obmc_sub_pixel_variance16x8_sse4_1, 0), ObmcSubpelVarianceParams(3, 4, &aom_obmc_sub_pixel_variance8x16_sse4_1, 0), ObmcSubpelVarianceParams(3, 3, &aom_obmc_sub_pixel_variance8x8_sse4_1, 0), ObmcSubpelVarianceParams(3, 2, &aom_obmc_sub_pixel_variance8x4_sse4_1, 0), ObmcSubpelVarianceParams(2, 3, &aom_obmc_sub_pixel_variance4x8_sse4_1, 0), ObmcSubpelVarianceParams(2, 2, &aom_obmc_sub_pixel_variance4x4_sse4_1, 0), ObmcSubpelVarianceParams(6, 4, &aom_obmc_sub_pixel_variance64x16_sse4_1, 0), ObmcSubpelVarianceParams(4, 6, &aom_obmc_sub_pixel_variance16x64_sse4_1, 0), ObmcSubpelVarianceParams(5, 3, &aom_obmc_sub_pixel_variance32x8_sse4_1, 0), ObmcSubpelVarianceParams(3, 5, &aom_obmc_sub_pixel_variance8x32_sse4_1, 0), ObmcSubpelVarianceParams(4, 2, &aom_obmc_sub_pixel_variance16x4_sse4_1, 0), ObmcSubpelVarianceParams(2, 4, &aom_obmc_sub_pixel_variance4x16_sse4_1, 0))); #endif #endif // HAVE_SSE4_1 #if HAVE_AVX2 INSTANTIATE_TEST_SUITE_P( AVX2, MseWxHTest, ::testing::Values(MseWxHParams(3, 3, &aom_mse_wxh_16bit_avx2, 8), MseWxHParams(3, 2, &aom_mse_wxh_16bit_avx2, 8), MseWxHParams(2, 3, &aom_mse_wxh_16bit_avx2, 8), MseWxHParams(2, 2, &aom_mse_wxh_16bit_avx2, 8))); INSTANTIATE_TEST_SUITE_P( AVX2, Mse16xHTest, ::testing::Values(Mse16xHParams(3, 3, &aom_mse_16xh_16bit_avx2, 8), Mse16xHParams(3, 2, &aom_mse_16xh_16bit_avx2, 8), Mse16xHParams(2, 3, &aom_mse_16xh_16bit_avx2, 8), Mse16xHParams(2, 2, &aom_mse_16xh_16bit_avx2, 8))); INSTANTIATE_TEST_SUITE_P(AVX2, AvxMseTest, ::testing::Values(MseParams(4, 4, &aom_mse16x16_avx2))); const VarianceParams kArrayVariance_avx2[] = { VarianceParams(7, 7, &aom_variance128x128_avx2), VarianceParams(7, 6, &aom_variance128x64_avx2), VarianceParams(6, 7, &aom_variance64x128_avx2), VarianceParams(6, 6, &aom_variance64x64_avx2), VarianceParams(6, 5, &aom_variance64x32_avx2), VarianceParams(5, 6, &aom_variance32x64_avx2), VarianceParams(5, 5, &aom_variance32x32_avx2), VarianceParams(5, 4, &aom_variance32x16_avx2), VarianceParams(4, 5, &aom_variance16x32_avx2), VarianceParams(4, 4, &aom_variance16x16_avx2), VarianceParams(4, 3, &aom_variance16x8_avx2), #if !CONFIG_REALTIME_ONLY VarianceParams(6, 4, &aom_variance64x16_avx2), VarianceParams(4, 6, &aom_variance16x64_avx2), VarianceParams(5, 3, &aom_variance32x8_avx2), VarianceParams(4, 2, &aom_variance16x4_avx2), #endif }; INSTANTIATE_TEST_SUITE_P(AVX2, AvxVarianceTest, ::testing::ValuesIn(kArrayVariance_avx2)); const GetSseSumParams kArrayGetSseSum8x8Quad_avx2[] = { GetSseSumParams(7, 7, &aom_get_var_sse_sum_8x8_quad_avx2, 0), GetSseSumParams(6, 6, &aom_get_var_sse_sum_8x8_quad_avx2, 0), GetSseSumParams(5, 5, &aom_get_var_sse_sum_8x8_quad_avx2, 0), GetSseSumParams(5, 4, &aom_get_var_sse_sum_8x8_quad_avx2, 0) }; INSTANTIATE_TEST_SUITE_P(AVX2, GetSseSum8x8QuadTest, ::testing::ValuesIn(kArrayGetSseSum8x8Quad_avx2)); const GetSseSumParamsDual kArrayGetSseSum16x16Dual_avx2[] = { GetSseSumParamsDual(7, 7, &aom_get_var_sse_sum_16x16_dual_avx2, 0), GetSseSumParamsDual(6, 6, &aom_get_var_sse_sum_16x16_dual_avx2, 0), GetSseSumParamsDual(5, 5, &aom_get_var_sse_sum_16x16_dual_avx2, 0), GetSseSumParamsDual(5, 4, &aom_get_var_sse_sum_16x16_dual_avx2, 0) }; INSTANTIATE_TEST_SUITE_P(AVX2, GetSseSum16x16DualTest, ::testing::ValuesIn(kArrayGetSseSum16x16Dual_avx2)); const SubpelVarianceParams kArraySubpelVariance_avx2[] = { SubpelVarianceParams(7, 7, &aom_sub_pixel_variance128x128_avx2, 0), SubpelVarianceParams(7, 6, &aom_sub_pixel_variance128x64_avx2, 0), SubpelVarianceParams(6, 7, &aom_sub_pixel_variance64x128_avx2, 0), SubpelVarianceParams(6, 6, &aom_sub_pixel_variance64x64_avx2, 0), SubpelVarianceParams(6, 5, &aom_sub_pixel_variance64x32_avx2, 0), SubpelVarianceParams(5, 6, &aom_sub_pixel_variance32x64_avx2, 0), SubpelVarianceParams(5, 5, &aom_sub_pixel_variance32x32_avx2, 0), SubpelVarianceParams(5, 4, &aom_sub_pixel_variance32x16_avx2, 0), SubpelVarianceParams(4, 5, &aom_sub_pixel_variance16x32_avx2, 0), SubpelVarianceParams(4, 4, &aom_sub_pixel_variance16x16_avx2, 0), SubpelVarianceParams(4, 3, &aom_sub_pixel_variance16x8_avx2, 0), #if !CONFIG_REALTIME_ONLY SubpelVarianceParams(4, 6, &aom_sub_pixel_variance16x64_avx2, 0), SubpelVarianceParams(4, 2, &aom_sub_pixel_variance16x4_avx2, 0), #endif }; INSTANTIATE_TEST_SUITE_P(AVX2, AvxSubpelVarianceTest, ::testing::ValuesIn(kArraySubpelVariance_avx2)); INSTANTIATE_TEST_SUITE_P( AVX2, AvxSubpelAvgVarianceTest, ::testing::Values( SubpelAvgVarianceParams(7, 7, &aom_sub_pixel_avg_variance128x128_avx2, 0), SubpelAvgVarianceParams(7, 6, &aom_sub_pixel_avg_variance128x64_avx2, 0), SubpelAvgVarianceParams(6, 7, &aom_sub_pixel_avg_variance64x128_avx2, 0), SubpelAvgVarianceParams(6, 6, &aom_sub_pixel_avg_variance64x64_avx2, 0), SubpelAvgVarianceParams(6, 5, &aom_sub_pixel_avg_variance64x32_avx2, 0), SubpelAvgVarianceParams(5, 6, &aom_sub_pixel_avg_variance32x64_avx2, 0), SubpelAvgVarianceParams(5, 5, &aom_sub_pixel_avg_variance32x32_avx2, 0), SubpelAvgVarianceParams(5, 4, &aom_sub_pixel_avg_variance32x16_avx2, 0))); #endif // HAVE_AVX2 #if HAVE_NEON INSTANTIATE_TEST_SUITE_P( NEON, MseWxHTest, ::testing::Values(MseWxHParams(3, 3, &aom_mse_wxh_16bit_neon, 8), MseWxHParams(3, 2, &aom_mse_wxh_16bit_neon, 8), MseWxHParams(2, 3, &aom_mse_wxh_16bit_neon, 8), MseWxHParams(2, 2, &aom_mse_wxh_16bit_neon, 8))); INSTANTIATE_TEST_SUITE_P( NEON, Mse16xHTest, ::testing::Values(Mse16xHParams(3, 3, &aom_mse_16xh_16bit_neon, 8), Mse16xHParams(3, 2, &aom_mse_16xh_16bit_neon, 8), Mse16xHParams(2, 3, &aom_mse_16xh_16bit_neon, 8), Mse16xHParams(2, 2, &aom_mse_16xh_16bit_neon, 8))); INSTANTIATE_TEST_SUITE_P(NEON, SumOfSquaresTest, ::testing::Values(aom_get_mb_ss_neon)); INSTANTIATE_TEST_SUITE_P(NEON, AvxMseTest, ::testing::Values(MseParams(3, 3, &aom_mse8x8_neon), MseParams(3, 4, &aom_mse8x16_neon), MseParams(4, 4, &aom_mse16x16_neon), MseParams(4, 3, &aom_mse16x8_neon))); const VarianceParams kArrayVariance_neon[] = { VarianceParams(7, 7, &aom_variance128x128_neon), VarianceParams(6, 6, &aom_variance64x64_neon), VarianceParams(7, 6, &aom_variance128x64_neon), VarianceParams(6, 7, &aom_variance64x128_neon), VarianceParams(6, 6, &aom_variance64x64_neon), VarianceParams(6, 5, &aom_variance64x32_neon), VarianceParams(5, 6, &aom_variance32x64_neon), VarianceParams(5, 5, &aom_variance32x32_neon), VarianceParams(5, 4, &aom_variance32x16_neon), VarianceParams(4, 5, &aom_variance16x32_neon), VarianceParams(4, 4, &aom_variance16x16_neon), VarianceParams(4, 3, &aom_variance16x8_neon), VarianceParams(3, 4, &aom_variance8x16_neon), VarianceParams(3, 3, &aom_variance8x8_neon), VarianceParams(3, 2, &aom_variance8x4_neon), VarianceParams(2, 3, &aom_variance4x8_neon), VarianceParams(2, 2, &aom_variance4x4_neon), #if !CONFIG_REALTIME_ONLY VarianceParams(2, 4, &aom_variance4x16_neon), VarianceParams(4, 2, &aom_variance16x4_neon), VarianceParams(3, 5, &aom_variance8x32_neon), VarianceParams(5, 3, &aom_variance32x8_neon), VarianceParams(4, 6, &aom_variance16x64_neon), VarianceParams(6, 4, &aom_variance64x16_neon), #endif }; INSTANTIATE_TEST_SUITE_P(NEON, AvxVarianceTest, ::testing::ValuesIn(kArrayVariance_neon)); const SubpelVarianceParams kArraySubpelVariance_neon[] = { SubpelVarianceParams(7, 7, &aom_sub_pixel_variance128x128_neon, 0), SubpelVarianceParams(7, 6, &aom_sub_pixel_variance128x64_neon, 0), SubpelVarianceParams(6, 7, &aom_sub_pixel_variance64x128_neon, 0), SubpelVarianceParams(6, 6, &aom_sub_pixel_variance64x64_neon, 0), SubpelVarianceParams(6, 5, &aom_sub_pixel_variance64x32_neon, 0), SubpelVarianceParams(5, 6, &aom_sub_pixel_variance32x64_neon, 0), SubpelVarianceParams(5, 5, &aom_sub_pixel_variance32x32_neon, 0), SubpelVarianceParams(5, 4, &aom_sub_pixel_variance32x16_neon, 0), SubpelVarianceParams(4, 5, &aom_sub_pixel_variance16x32_neon, 0), SubpelVarianceParams(4, 4, &aom_sub_pixel_variance16x16_neon, 0), SubpelVarianceParams(4, 3, &aom_sub_pixel_variance16x8_neon, 0), SubpelVarianceParams(3, 4, &aom_sub_pixel_variance8x16_neon, 0), SubpelVarianceParams(3, 3, &aom_sub_pixel_variance8x8_neon, 0), SubpelVarianceParams(3, 2, &aom_sub_pixel_variance8x4_neon, 0), SubpelVarianceParams(2, 3, &aom_sub_pixel_variance4x8_neon, 0), SubpelVarianceParams(2, 2, &aom_sub_pixel_variance4x4_neon, 0), #if !CONFIG_REALTIME_ONLY SubpelVarianceParams(6, 4, &aom_sub_pixel_variance64x16_neon, 0), SubpelVarianceParams(4, 6, &aom_sub_pixel_variance16x64_neon, 0), SubpelVarianceParams(5, 3, &aom_sub_pixel_variance32x8_neon, 0), SubpelVarianceParams(3, 5, &aom_sub_pixel_variance8x32_neon, 0), SubpelVarianceParams(4, 2, &aom_sub_pixel_variance16x4_neon, 0), SubpelVarianceParams(2, 4, &aom_sub_pixel_variance4x16_neon, 0), #endif }; INSTANTIATE_TEST_SUITE_P(NEON, AvxSubpelVarianceTest, ::testing::ValuesIn(kArraySubpelVariance_neon)); const SubpelAvgVarianceParams kArraySubpelAvgVariance_neon[] = { SubpelAvgVarianceParams(7, 7, &aom_sub_pixel_avg_variance128x128_neon, 0), SubpelAvgVarianceParams(7, 6, &aom_sub_pixel_avg_variance128x64_neon, 0), SubpelAvgVarianceParams(6, 7, &aom_sub_pixel_avg_variance64x128_neon, 0), SubpelAvgVarianceParams(6, 6, &aom_sub_pixel_avg_variance64x64_neon, 0), SubpelAvgVarianceParams(6, 5, &aom_sub_pixel_avg_variance64x32_neon, 0), SubpelAvgVarianceParams(5, 6, &aom_sub_pixel_avg_variance32x64_neon, 0), SubpelAvgVarianceParams(5, 5, &aom_sub_pixel_avg_variance32x32_neon, 0), SubpelAvgVarianceParams(5, 4, &aom_sub_pixel_avg_variance32x16_neon, 0), SubpelAvgVarianceParams(4, 5, &aom_sub_pixel_avg_variance16x32_neon, 0), SubpelAvgVarianceParams(4, 4, &aom_sub_pixel_avg_variance16x16_neon, 0), SubpelAvgVarianceParams(4, 3, &aom_sub_pixel_avg_variance16x8_neon, 0), SubpelAvgVarianceParams(3, 4, &aom_sub_pixel_avg_variance8x16_neon, 0), SubpelAvgVarianceParams(3, 3, &aom_sub_pixel_avg_variance8x8_neon, 0), SubpelAvgVarianceParams(3, 2, &aom_sub_pixel_avg_variance8x4_neon, 0), SubpelAvgVarianceParams(2, 3, &aom_sub_pixel_avg_variance4x8_neon, 0), SubpelAvgVarianceParams(2, 2, &aom_sub_pixel_avg_variance4x4_neon, 0), #if !CONFIG_REALTIME_ONLY SubpelAvgVarianceParams(6, 4, &aom_sub_pixel_avg_variance64x16_neon, 0), SubpelAvgVarianceParams(4, 6, &aom_sub_pixel_avg_variance16x64_neon, 0), SubpelAvgVarianceParams(5, 3, &aom_sub_pixel_avg_variance32x8_neon, 0), SubpelAvgVarianceParams(3, 5, &aom_sub_pixel_avg_variance8x32_neon, 0), SubpelAvgVarianceParams(4, 2, &aom_sub_pixel_avg_variance16x4_neon, 0), SubpelAvgVarianceParams(2, 4, &aom_sub_pixel_avg_variance4x16_neon, 0), #endif }; INSTANTIATE_TEST_SUITE_P(NEON, AvxSubpelAvgVarianceTest, ::testing::ValuesIn(kArraySubpelAvgVariance_neon)); const DistWtdSubpelAvgVarianceParams kArrayDistWtdSubpelAvgVariance_neon[] = { DistWtdSubpelAvgVarianceParams( 6, 6, &aom_dist_wtd_sub_pixel_avg_variance64x64_neon, 0), DistWtdSubpelAvgVarianceParams( 6, 5, &aom_dist_wtd_sub_pixel_avg_variance64x32_neon, 0), DistWtdSubpelAvgVarianceParams( 5, 6, &aom_dist_wtd_sub_pixel_avg_variance32x64_neon, 0), DistWtdSubpelAvgVarianceParams( 5, 5, &aom_dist_wtd_sub_pixel_avg_variance32x32_neon, 0), DistWtdSubpelAvgVarianceParams( 5, 4, &aom_dist_wtd_sub_pixel_avg_variance32x16_neon, 0), DistWtdSubpelAvgVarianceParams( 4, 5, &aom_dist_wtd_sub_pixel_avg_variance16x32_neon, 0), DistWtdSubpelAvgVarianceParams( 4, 4, &aom_dist_wtd_sub_pixel_avg_variance16x16_neon, 0), DistWtdSubpelAvgVarianceParams( 4, 3, &aom_dist_wtd_sub_pixel_avg_variance16x8_neon, 0), DistWtdSubpelAvgVarianceParams( 3, 4, &aom_dist_wtd_sub_pixel_avg_variance8x16_neon, 0), DistWtdSubpelAvgVarianceParams( 3, 3, &aom_dist_wtd_sub_pixel_avg_variance8x8_neon, 0), DistWtdSubpelAvgVarianceParams( 3, 2, &aom_dist_wtd_sub_pixel_avg_variance8x4_neon, 0), DistWtdSubpelAvgVarianceParams( 2, 3, &aom_dist_wtd_sub_pixel_avg_variance4x8_neon, 0), DistWtdSubpelAvgVarianceParams( 2, 2, &aom_dist_wtd_sub_pixel_avg_variance4x4_neon, 0), #if !CONFIG_REALTIME_ONLY DistWtdSubpelAvgVarianceParams( 6, 4, &aom_dist_wtd_sub_pixel_avg_variance64x16_neon, 0), DistWtdSubpelAvgVarianceParams( 4, 6, &aom_dist_wtd_sub_pixel_avg_variance16x64_neon, 0), DistWtdSubpelAvgVarianceParams( 5, 3, &aom_dist_wtd_sub_pixel_avg_variance32x8_neon, 0), DistWtdSubpelAvgVarianceParams( 3, 5, &aom_dist_wtd_sub_pixel_avg_variance8x32_neon, 0), DistWtdSubpelAvgVarianceParams( 4, 2, &aom_dist_wtd_sub_pixel_avg_variance16x4_neon, 0), DistWtdSubpelAvgVarianceParams( 2, 4, &aom_dist_wtd_sub_pixel_avg_variance4x16_neon, 0), #endif // !CONFIG_REALTIME_ONLY }; INSTANTIATE_TEST_SUITE_P( NEON, AvxDistWtdSubpelAvgVarianceTest, ::testing::ValuesIn(kArrayDistWtdSubpelAvgVariance_neon)); #if !CONFIG_REALTIME_ONLY const ObmcSubpelVarianceParams kArrayObmcSubpelVariance_neon[] = { ObmcSubpelVarianceParams(7, 7, &aom_obmc_sub_pixel_variance128x128_neon, 0), ObmcSubpelVarianceParams(7, 6, &aom_obmc_sub_pixel_variance128x64_neon, 0), ObmcSubpelVarianceParams(6, 7, &aom_obmc_sub_pixel_variance64x128_neon, 0), ObmcSubpelVarianceParams(6, 6, &aom_obmc_sub_pixel_variance64x64_neon, 0), ObmcSubpelVarianceParams(6, 5, &aom_obmc_sub_pixel_variance64x32_neon, 0), ObmcSubpelVarianceParams(5, 6, &aom_obmc_sub_pixel_variance32x64_neon, 0), ObmcSubpelVarianceParams(5, 5, &aom_obmc_sub_pixel_variance32x32_neon, 0), ObmcSubpelVarianceParams(5, 4, &aom_obmc_sub_pixel_variance32x16_neon, 0), ObmcSubpelVarianceParams(4, 5, &aom_obmc_sub_pixel_variance16x32_neon, 0), ObmcSubpelVarianceParams(4, 4, &aom_obmc_sub_pixel_variance16x16_neon, 0), ObmcSubpelVarianceParams(4, 3, &aom_obmc_sub_pixel_variance16x8_neon, 0), ObmcSubpelVarianceParams(3, 4, &aom_obmc_sub_pixel_variance8x16_neon, 0), ObmcSubpelVarianceParams(3, 3, &aom_obmc_sub_pixel_variance8x8_neon, 0), ObmcSubpelVarianceParams(3, 2, &aom_obmc_sub_pixel_variance8x4_neon, 0), ObmcSubpelVarianceParams(2, 3, &aom_obmc_sub_pixel_variance4x8_neon, 0), ObmcSubpelVarianceParams(2, 2, &aom_obmc_sub_pixel_variance4x4_neon, 0), ObmcSubpelVarianceParams(6, 4, &aom_obmc_sub_pixel_variance64x16_neon, 0), ObmcSubpelVarianceParams(4, 6, &aom_obmc_sub_pixel_variance16x64_neon, 0), ObmcSubpelVarianceParams(5, 3, &aom_obmc_sub_pixel_variance32x8_neon, 0), ObmcSubpelVarianceParams(3, 5, &aom_obmc_sub_pixel_variance8x32_neon, 0), ObmcSubpelVarianceParams(4, 2, &aom_obmc_sub_pixel_variance16x4_neon, 0), ObmcSubpelVarianceParams(2, 4, &aom_obmc_sub_pixel_variance4x16_neon, 0), }; INSTANTIATE_TEST_SUITE_P(NEON, AvxObmcSubpelVarianceTest, ::testing::ValuesIn(kArrayObmcSubpelVariance_neon)); #endif const GetSseSumParams kArrayGetSseSum8x8Quad_neon[] = { GetSseSumParams(7, 7, &aom_get_var_sse_sum_8x8_quad_neon, 0), GetSseSumParams(6, 6, &aom_get_var_sse_sum_8x8_quad_neon, 0), GetSseSumParams(5, 5, &aom_get_var_sse_sum_8x8_quad_neon, 0), GetSseSumParams(5, 4, &aom_get_var_sse_sum_8x8_quad_neon, 0) }; INSTANTIATE_TEST_SUITE_P(NEON, GetSseSum8x8QuadTest, ::testing::ValuesIn(kArrayGetSseSum8x8Quad_neon)); const GetSseSumParamsDual kArrayGetSseSum16x16Dual_neon[] = { GetSseSumParamsDual(7, 7, &aom_get_var_sse_sum_16x16_dual_neon, 0), GetSseSumParamsDual(6, 6, &aom_get_var_sse_sum_16x16_dual_neon, 0), GetSseSumParamsDual(5, 5, &aom_get_var_sse_sum_16x16_dual_neon, 0), GetSseSumParamsDual(5, 4, &aom_get_var_sse_sum_16x16_dual_neon, 0) }; INSTANTIATE_TEST_SUITE_P(NEON, GetSseSum16x16DualTest, ::testing::ValuesIn(kArrayGetSseSum16x16Dual_neon)); #if CONFIG_AV1_HIGHBITDEPTH const VarianceParams kArrayHBDVariance_neon[] = { VarianceParams(7, 7, &aom_highbd_12_variance128x128_neon, 12), VarianceParams(7, 6, &aom_highbd_12_variance128x64_neon, 12), VarianceParams(6, 7, &aom_highbd_12_variance64x128_neon, 12), VarianceParams(6, 6, &aom_highbd_12_variance64x64_neon, 12), VarianceParams(6, 5, &aom_highbd_12_variance64x32_neon, 12), VarianceParams(5, 6, &aom_highbd_12_variance32x64_neon, 12), VarianceParams(5, 5, &aom_highbd_12_variance32x32_neon, 12), VarianceParams(5, 4, &aom_highbd_12_variance32x16_neon, 12), VarianceParams(4, 5, &aom_highbd_12_variance16x32_neon, 12), VarianceParams(4, 4, &aom_highbd_12_variance16x16_neon, 12), VarianceParams(4, 3, &aom_highbd_12_variance16x8_neon, 12), VarianceParams(3, 4, &aom_highbd_12_variance8x16_neon, 12), VarianceParams(3, 3, &aom_highbd_12_variance8x8_neon, 12), VarianceParams(3, 2, &aom_highbd_12_variance8x4_neon, 12), VarianceParams(2, 3, &aom_highbd_12_variance4x8_neon, 12), VarianceParams(2, 2, &aom_highbd_12_variance4x4_neon, 12), VarianceParams(7, 7, &aom_highbd_10_variance128x128_neon, 10), VarianceParams(7, 6, &aom_highbd_10_variance128x64_neon, 10), VarianceParams(6, 7, &aom_highbd_10_variance64x128_neon, 10), VarianceParams(6, 6, &aom_highbd_10_variance64x64_neon, 10), VarianceParams(6, 5, &aom_highbd_10_variance64x32_neon, 10), VarianceParams(5, 6, &aom_highbd_10_variance32x64_neon, 10), VarianceParams(5, 5, &aom_highbd_10_variance32x32_neon, 10), VarianceParams(5, 4, &aom_highbd_10_variance32x16_neon, 10), VarianceParams(4, 5, &aom_highbd_10_variance16x32_neon, 10), VarianceParams(4, 4, &aom_highbd_10_variance16x16_neon, 10), VarianceParams(4, 3, &aom_highbd_10_variance16x8_neon, 10), VarianceParams(3, 4, &aom_highbd_10_variance8x16_neon, 10), VarianceParams(3, 3, &aom_highbd_10_variance8x8_neon, 10), VarianceParams(3, 2, &aom_highbd_10_variance8x4_neon, 10), VarianceParams(2, 3, &aom_highbd_10_variance4x8_neon, 10), VarianceParams(2, 2, &aom_highbd_10_variance4x4_neon, 10), VarianceParams(7, 7, &aom_highbd_8_variance128x128_neon, 8), VarianceParams(7, 6, &aom_highbd_8_variance128x64_neon, 8), VarianceParams(6, 7, &aom_highbd_8_variance64x128_neon, 8), VarianceParams(6, 6, &aom_highbd_8_variance64x64_neon, 8), VarianceParams(6, 5, &aom_highbd_8_variance64x32_neon, 8), VarianceParams(5, 6, &aom_highbd_8_variance32x64_neon, 8), VarianceParams(5, 5, &aom_highbd_8_variance32x32_neon, 8), VarianceParams(5, 4, &aom_highbd_8_variance32x16_neon, 8), VarianceParams(4, 5, &aom_highbd_8_variance16x32_neon, 8), VarianceParams(4, 4, &aom_highbd_8_variance16x16_neon, 8), VarianceParams(4, 3, &aom_highbd_8_variance16x8_neon, 8), VarianceParams(3, 4, &aom_highbd_8_variance8x16_neon, 8), VarianceParams(3, 3, &aom_highbd_8_variance8x8_neon, 8), VarianceParams(3, 2, &aom_highbd_8_variance8x4_neon, 8), VarianceParams(2, 3, &aom_highbd_8_variance4x8_neon, 8), VarianceParams(2, 2, &aom_highbd_8_variance4x4_neon, 8), #if !CONFIG_REALTIME_ONLY VarianceParams(6, 4, &aom_highbd_12_variance64x16_neon, 12), VarianceParams(4, 6, &aom_highbd_12_variance16x64_neon, 12), VarianceParams(5, 3, &aom_highbd_12_variance32x8_neon, 12), VarianceParams(3, 5, &aom_highbd_12_variance8x32_neon, 12), VarianceParams(4, 2, &aom_highbd_12_variance16x4_neon, 12), VarianceParams(2, 4, &aom_highbd_12_variance4x16_neon, 12), VarianceParams(6, 4, &aom_highbd_10_variance64x16_neon, 10), VarianceParams(4, 6, &aom_highbd_10_variance16x64_neon, 10), VarianceParams(5, 3, &aom_highbd_10_variance32x8_neon, 10), VarianceParams(3, 5, &aom_highbd_10_variance8x32_neon, 10), VarianceParams(4, 2, &aom_highbd_10_variance16x4_neon, 10), VarianceParams(2, 4, &aom_highbd_10_variance4x16_neon, 10), VarianceParams(6, 4, &aom_highbd_8_variance64x16_neon, 8), VarianceParams(4, 6, &aom_highbd_8_variance16x64_neon, 8), VarianceParams(5, 3, &aom_highbd_8_variance32x8_neon, 8), VarianceParams(3, 5, &aom_highbd_8_variance8x32_neon, 8), VarianceParams(4, 2, &aom_highbd_8_variance16x4_neon, 8), VarianceParams(2, 4, &aom_highbd_8_variance4x16_neon, 8), #endif }; INSTANTIATE_TEST_SUITE_P(NEON, AvxHBDVarianceTest, ::testing::ValuesIn(kArrayHBDVariance_neon)); const SubpelVarianceParams kArrayHBDSubpelVariance_neon[] = { SubpelVarianceParams(6, 6, &aom_highbd_12_sub_pixel_variance64x64_neon, 12), SubpelVarianceParams(6, 5, &aom_highbd_12_sub_pixel_variance64x32_neon, 12), SubpelVarianceParams(5, 6, &aom_highbd_12_sub_pixel_variance32x64_neon, 12), SubpelVarianceParams(5, 5, &aom_highbd_12_sub_pixel_variance32x32_neon, 12), SubpelVarianceParams(5, 4, &aom_highbd_12_sub_pixel_variance32x16_neon, 12), SubpelVarianceParams(4, 5, &aom_highbd_12_sub_pixel_variance16x32_neon, 12), SubpelVarianceParams(4, 4, &aom_highbd_12_sub_pixel_variance16x16_neon, 12), SubpelVarianceParams(4, 3, &aom_highbd_12_sub_pixel_variance16x8_neon, 12), SubpelVarianceParams(3, 4, &aom_highbd_12_sub_pixel_variance8x16_neon, 12), SubpelVarianceParams(3, 3, &aom_highbd_12_sub_pixel_variance8x8_neon, 12), SubpelVarianceParams(3, 2, &aom_highbd_12_sub_pixel_variance8x4_neon, 12), SubpelVarianceParams(2, 3, &aom_highbd_12_sub_pixel_variance4x8_neon, 12), SubpelVarianceParams(2, 2, &aom_highbd_12_sub_pixel_variance4x4_neon, 12), SubpelVarianceParams(6, 6, &aom_highbd_10_sub_pixel_variance64x64_neon, 10), SubpelVarianceParams(6, 5, &aom_highbd_10_sub_pixel_variance64x32_neon, 10), SubpelVarianceParams(5, 6, &aom_highbd_10_sub_pixel_variance32x64_neon, 10), SubpelVarianceParams(5, 5, &aom_highbd_10_sub_pixel_variance32x32_neon, 10), SubpelVarianceParams(5, 4, &aom_highbd_10_sub_pixel_variance32x16_neon, 10), SubpelVarianceParams(4, 5, &aom_highbd_10_sub_pixel_variance16x32_neon, 10), SubpelVarianceParams(4, 4, &aom_highbd_10_sub_pixel_variance16x16_neon, 10), SubpelVarianceParams(4, 3, &aom_highbd_10_sub_pixel_variance16x8_neon, 10), SubpelVarianceParams(3, 4, &aom_highbd_10_sub_pixel_variance8x16_neon, 10), SubpelVarianceParams(3, 3, &aom_highbd_10_sub_pixel_variance8x8_neon, 10), SubpelVarianceParams(3, 2, &aom_highbd_10_sub_pixel_variance8x4_neon, 10), SubpelVarianceParams(2, 3, &aom_highbd_10_sub_pixel_variance4x8_neon, 10), SubpelVarianceParams(2, 2, &aom_highbd_10_sub_pixel_variance4x4_neon, 10), SubpelVarianceParams(6, 6, &aom_highbd_8_sub_pixel_variance64x64_neon, 8), SubpelVarianceParams(6, 5, &aom_highbd_8_sub_pixel_variance64x32_neon, 8), SubpelVarianceParams(5, 6, &aom_highbd_8_sub_pixel_variance32x64_neon, 8), SubpelVarianceParams(5, 5, &aom_highbd_8_sub_pixel_variance32x32_neon, 8), SubpelVarianceParams(5, 4, &aom_highbd_8_sub_pixel_variance32x16_neon, 8), SubpelVarianceParams(4, 5, &aom_highbd_8_sub_pixel_variance16x32_neon, 8), SubpelVarianceParams(4, 4, &aom_highbd_8_sub_pixel_variance16x16_neon, 8), SubpelVarianceParams(4, 3, &aom_highbd_8_sub_pixel_variance16x8_neon, 8), SubpelVarianceParams(3, 4, &aom_highbd_8_sub_pixel_variance8x16_neon, 8), SubpelVarianceParams(3, 3, &aom_highbd_8_sub_pixel_variance8x8_neon, 8), SubpelVarianceParams(3, 2, &aom_highbd_8_sub_pixel_variance8x4_neon, 8), SubpelVarianceParams(2, 3, &aom_highbd_8_sub_pixel_variance4x8_neon, 8), SubpelVarianceParams(2, 2, &aom_highbd_8_sub_pixel_variance4x4_neon, 8), #if !CONFIG_REALTIME_ONLY SubpelVarianceParams(6, 4, &aom_highbd_8_sub_pixel_variance64x16_neon, 8), SubpelVarianceParams(4, 6, &aom_highbd_8_sub_pixel_variance16x64_neon, 8), SubpelVarianceParams(5, 3, &aom_highbd_8_sub_pixel_variance32x8_neon, 8), SubpelVarianceParams(3, 5, &aom_highbd_8_sub_pixel_variance8x32_neon, 8), SubpelVarianceParams(4, 2, &aom_highbd_8_sub_pixel_variance16x4_neon, 8), SubpelVarianceParams(2, 4, &aom_highbd_8_sub_pixel_variance4x16_neon, 8), SubpelVarianceParams(6, 4, &aom_highbd_10_sub_pixel_variance64x16_neon, 10), SubpelVarianceParams(4, 6, &aom_highbd_10_sub_pixel_variance16x64_neon, 10), SubpelVarianceParams(5, 3, &aom_highbd_10_sub_pixel_variance32x8_neon, 10), SubpelVarianceParams(3, 5, &aom_highbd_10_sub_pixel_variance8x32_neon, 10), SubpelVarianceParams(4, 2, &aom_highbd_10_sub_pixel_variance16x4_neon, 10), SubpelVarianceParams(2, 4, &aom_highbd_10_sub_pixel_variance4x16_neon, 10), SubpelVarianceParams(6, 4, &aom_highbd_12_sub_pixel_variance64x16_neon, 12), SubpelVarianceParams(4, 6, &aom_highbd_12_sub_pixel_variance16x64_neon, 12), SubpelVarianceParams(5, 3, &aom_highbd_12_sub_pixel_variance32x8_neon, 12), SubpelVarianceParams(3, 5, &aom_highbd_12_sub_pixel_variance8x32_neon, 12), SubpelVarianceParams(4, 2, &aom_highbd_12_sub_pixel_variance16x4_neon, 12), SubpelVarianceParams(2, 4, &aom_highbd_12_sub_pixel_variance4x16_neon, 12), #endif //! CONFIG_REALTIME_ONLY }; INSTANTIATE_TEST_SUITE_P(NEON, AvxHBDSubpelVarianceTest, ::testing::ValuesIn(kArrayHBDSubpelVariance_neon)); const SubpelAvgVarianceParams kArrayHBDSubpelAvgVariance_neon[] = { SubpelAvgVarianceParams(7, 7, &aom_highbd_8_sub_pixel_avg_variance128x128_neon, 8), SubpelAvgVarianceParams(7, 6, &aom_highbd_8_sub_pixel_avg_variance128x64_neon, 8), SubpelAvgVarianceParams(6, 7, &aom_highbd_8_sub_pixel_avg_variance64x128_neon, 8), SubpelAvgVarianceParams(6, 6, &aom_highbd_8_sub_pixel_avg_variance64x64_neon, 8), SubpelAvgVarianceParams(6, 5, &aom_highbd_8_sub_pixel_avg_variance64x32_neon, 8), SubpelAvgVarianceParams(5, 6, &aom_highbd_8_sub_pixel_avg_variance32x64_neon, 8), SubpelAvgVarianceParams(5, 5, &aom_highbd_8_sub_pixel_avg_variance32x32_neon, 8), SubpelAvgVarianceParams(5, 4, &aom_highbd_8_sub_pixel_avg_variance32x16_neon, 8), SubpelAvgVarianceParams(4, 5, &aom_highbd_8_sub_pixel_avg_variance16x32_neon, 8), SubpelAvgVarianceParams(4, 4, &aom_highbd_8_sub_pixel_avg_variance16x16_neon, 8), SubpelAvgVarianceParams(4, 3, &aom_highbd_8_sub_pixel_avg_variance16x8_neon, 8), SubpelAvgVarianceParams(3, 4, &aom_highbd_8_sub_pixel_avg_variance8x16_neon, 8), SubpelAvgVarianceParams(3, 3, &aom_highbd_8_sub_pixel_avg_variance8x8_neon, 8), SubpelAvgVarianceParams(3, 2, &aom_highbd_8_sub_pixel_avg_variance8x4_neon, 8), SubpelAvgVarianceParams(2, 3, &aom_highbd_8_sub_pixel_avg_variance4x8_neon, 8), SubpelAvgVarianceParams(2, 2, &aom_highbd_8_sub_pixel_avg_variance4x4_neon, 8), SubpelAvgVarianceParams( 7, 7, &aom_highbd_10_sub_pixel_avg_variance128x128_neon, 10), SubpelAvgVarianceParams(7, 6, &aom_highbd_10_sub_pixel_avg_variance128x64_neon, 10), SubpelAvgVarianceParams(6, 7, &aom_highbd_10_sub_pixel_avg_variance64x128_neon, 10), SubpelAvgVarianceParams(6, 6, &aom_highbd_10_sub_pixel_avg_variance64x64_neon, 10), SubpelAvgVarianceParams(6, 5, &aom_highbd_10_sub_pixel_avg_variance64x32_neon, 10), SubpelAvgVarianceParams(5, 6, &aom_highbd_10_sub_pixel_avg_variance32x64_neon, 10), SubpelAvgVarianceParams(5, 5, &aom_highbd_10_sub_pixel_avg_variance32x32_neon, 10), SubpelAvgVarianceParams(5, 4, &aom_highbd_10_sub_pixel_avg_variance32x16_neon, 10), SubpelAvgVarianceParams(4, 5, &aom_highbd_10_sub_pixel_avg_variance16x32_neon, 10), SubpelAvgVarianceParams(4, 4, &aom_highbd_10_sub_pixel_avg_variance16x16_neon, 10), SubpelAvgVarianceParams(4, 3, &aom_highbd_10_sub_pixel_avg_variance16x8_neon, 10), SubpelAvgVarianceParams(3, 4, &aom_highbd_10_sub_pixel_avg_variance8x16_neon, 10), SubpelAvgVarianceParams(3, 3, &aom_highbd_10_sub_pixel_avg_variance8x8_neon, 10), SubpelAvgVarianceParams(3, 2, &aom_highbd_10_sub_pixel_avg_variance8x4_neon, 10), SubpelAvgVarianceParams(2, 3, &aom_highbd_10_sub_pixel_avg_variance4x8_neon, 10), SubpelAvgVarianceParams(2, 2, &aom_highbd_10_sub_pixel_avg_variance4x4_neon, 10), SubpelAvgVarianceParams( 7, 7, &aom_highbd_12_sub_pixel_avg_variance128x128_neon, 12), SubpelAvgVarianceParams(7, 6, &aom_highbd_12_sub_pixel_avg_variance128x64_neon, 12), SubpelAvgVarianceParams(6, 7, &aom_highbd_12_sub_pixel_avg_variance64x128_neon, 12), SubpelAvgVarianceParams(6, 6, &aom_highbd_12_sub_pixel_avg_variance64x64_neon, 12), SubpelAvgVarianceParams(6, 5, &aom_highbd_12_sub_pixel_avg_variance64x32_neon, 12), SubpelAvgVarianceParams(5, 6, &aom_highbd_12_sub_pixel_avg_variance32x64_neon, 12), SubpelAvgVarianceParams(5, 5, &aom_highbd_12_sub_pixel_avg_variance32x32_neon, 12), SubpelAvgVarianceParams(5, 4, &aom_highbd_12_sub_pixel_avg_variance32x16_neon, 12), SubpelAvgVarianceParams(4, 5, &aom_highbd_12_sub_pixel_avg_variance16x32_neon, 12), SubpelAvgVarianceParams(4, 4, &aom_highbd_12_sub_pixel_avg_variance16x16_neon, 12), SubpelAvgVarianceParams(4, 3, &aom_highbd_12_sub_pixel_avg_variance16x8_neon, 12), SubpelAvgVarianceParams(3, 4, &aom_highbd_12_sub_pixel_avg_variance8x16_neon, 12), SubpelAvgVarianceParams(3, 3, &aom_highbd_12_sub_pixel_avg_variance8x8_neon, 12), SubpelAvgVarianceParams(3, 2, &aom_highbd_12_sub_pixel_avg_variance8x4_neon, 12), SubpelAvgVarianceParams(2, 3, &aom_highbd_12_sub_pixel_avg_variance4x8_neon, 12), SubpelAvgVarianceParams(2, 2, &aom_highbd_12_sub_pixel_avg_variance4x4_neon, 12), #if !CONFIG_REALTIME_ONLY SubpelAvgVarianceParams(6, 4, &aom_highbd_8_sub_pixel_avg_variance64x16_neon, 8), SubpelAvgVarianceParams(4, 6, &aom_highbd_8_sub_pixel_avg_variance16x64_neon, 8), SubpelAvgVarianceParams(5, 3, &aom_highbd_8_sub_pixel_avg_variance32x8_neon, 8), SubpelAvgVarianceParams(3, 5, &aom_highbd_8_sub_pixel_avg_variance8x32_neon, 8), SubpelAvgVarianceParams(4, 2, &aom_highbd_8_sub_pixel_avg_variance16x4_neon, 8), SubpelAvgVarianceParams(2, 4, &aom_highbd_8_sub_pixel_avg_variance4x16_neon, 8), SubpelAvgVarianceParams(6, 4, &aom_highbd_10_sub_pixel_avg_variance64x16_neon, 10), SubpelAvgVarianceParams(4, 6, &aom_highbd_10_sub_pixel_avg_variance16x64_neon, 10), SubpelAvgVarianceParams(5, 3, &aom_highbd_10_sub_pixel_avg_variance32x8_neon, 10), SubpelAvgVarianceParams(3, 5, &aom_highbd_10_sub_pixel_avg_variance8x32_neon, 10), SubpelAvgVarianceParams(4, 2, &aom_highbd_10_sub_pixel_avg_variance16x4_neon, 10), SubpelAvgVarianceParams(2, 4, &aom_highbd_10_sub_pixel_avg_variance4x16_neon, 10), SubpelAvgVarianceParams(6, 4, &aom_highbd_12_sub_pixel_avg_variance64x16_neon, 12), SubpelAvgVarianceParams(4, 6, &aom_highbd_12_sub_pixel_avg_variance16x64_neon, 12), SubpelAvgVarianceParams(5, 3, &aom_highbd_12_sub_pixel_avg_variance32x8_neon, 12), SubpelAvgVarianceParams(3, 5, &aom_highbd_12_sub_pixel_avg_variance8x32_neon, 12), SubpelAvgVarianceParams(4, 2, &aom_highbd_12_sub_pixel_avg_variance16x4_neon, 12), SubpelAvgVarianceParams(2, 4, &aom_highbd_12_sub_pixel_avg_variance4x16_neon, 12), #endif }; INSTANTIATE_TEST_SUITE_P(NEON, AvxHBDSubpelAvgVarianceTest, ::testing::ValuesIn(kArrayHBDSubpelAvgVariance_neon)); const DistWtdSubpelAvgVarianceParams kArrayHBDDistWtdSubpelAvgVariance_neon[] = { DistWtdSubpelAvgVarianceParams( 7, 7, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance128x128_neon, 8), DistWtdSubpelAvgVarianceParams( 7, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance128x64_neon, 8), DistWtdSubpelAvgVarianceParams( 6, 7, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x128_neon, 8), DistWtdSubpelAvgVarianceParams( 6, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x64_neon, 8), DistWtdSubpelAvgVarianceParams( 6, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x32_neon, 8), DistWtdSubpelAvgVarianceParams( 5, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x64_neon, 8), DistWtdSubpelAvgVarianceParams( 5, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x32_neon, 8), DistWtdSubpelAvgVarianceParams( 5, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x16_neon, 8), DistWtdSubpelAvgVarianceParams( 4, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x32_neon, 8), DistWtdSubpelAvgVarianceParams( 4, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x16_neon, 8), DistWtdSubpelAvgVarianceParams( 4, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x8_neon, 8), DistWtdSubpelAvgVarianceParams( 3, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x16_neon, 8), DistWtdSubpelAvgVarianceParams( 3, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x8_neon, 8), DistWtdSubpelAvgVarianceParams( 3, 2, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x4_neon, 8), DistWtdSubpelAvgVarianceParams( 2, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance4x8_neon, 8), DistWtdSubpelAvgVarianceParams( 2, 2, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance4x4_neon, 8), DistWtdSubpelAvgVarianceParams( 7, 7, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance128x128_neon, 10), DistWtdSubpelAvgVarianceParams( 7, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance128x64_neon, 10), DistWtdSubpelAvgVarianceParams( 6, 7, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x128_neon, 10), DistWtdSubpelAvgVarianceParams( 6, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x64_neon, 10), DistWtdSubpelAvgVarianceParams( 6, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x32_neon, 10), DistWtdSubpelAvgVarianceParams( 5, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x64_neon, 10), DistWtdSubpelAvgVarianceParams( 5, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x32_neon, 10), DistWtdSubpelAvgVarianceParams( 5, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x16_neon, 10), DistWtdSubpelAvgVarianceParams( 4, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x32_neon, 10), DistWtdSubpelAvgVarianceParams( 4, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x16_neon, 10), DistWtdSubpelAvgVarianceParams( 4, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x8_neon, 10), DistWtdSubpelAvgVarianceParams( 3, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x16_neon, 10), DistWtdSubpelAvgVarianceParams( 3, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x8_neon, 10), DistWtdSubpelAvgVarianceParams( 3, 2, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x4_neon, 10), DistWtdSubpelAvgVarianceParams( 2, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance4x8_neon, 10), DistWtdSubpelAvgVarianceParams( 2, 2, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance4x4_neon, 10), DistWtdSubpelAvgVarianceParams( 7, 7, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance128x128_neon, 12), DistWtdSubpelAvgVarianceParams( 7, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance128x64_neon, 12), DistWtdSubpelAvgVarianceParams( 6, 7, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x128_neon, 12), DistWtdSubpelAvgVarianceParams( 6, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x64_neon, 12), DistWtdSubpelAvgVarianceParams( 6, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x32_neon, 12), DistWtdSubpelAvgVarianceParams( 5, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x64_neon, 12), DistWtdSubpelAvgVarianceParams( 5, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x32_neon, 12), DistWtdSubpelAvgVarianceParams( 5, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x16_neon, 12), DistWtdSubpelAvgVarianceParams( 4, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x32_neon, 12), DistWtdSubpelAvgVarianceParams( 4, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x16_neon, 12), DistWtdSubpelAvgVarianceParams( 4, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x8_neon, 12), DistWtdSubpelAvgVarianceParams( 3, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x16_neon, 12), DistWtdSubpelAvgVarianceParams( 3, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x8_neon, 12), DistWtdSubpelAvgVarianceParams( 3, 2, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x4_neon, 12), DistWtdSubpelAvgVarianceParams( 2, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance4x8_neon, 12), DistWtdSubpelAvgVarianceParams( 2, 2, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance4x4_neon, 12), #if !CONFIG_REALTIME_ONLY DistWtdSubpelAvgVarianceParams( 6, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x16_neon, 8), DistWtdSubpelAvgVarianceParams( 4, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x64_neon, 8), DistWtdSubpelAvgVarianceParams( 5, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x8_neon, 8), DistWtdSubpelAvgVarianceParams( 3, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x32_neon, 8), DistWtdSubpelAvgVarianceParams( 4, 2, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x4_neon, 8), DistWtdSubpelAvgVarianceParams( 2, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance4x16_neon, 8), DistWtdSubpelAvgVarianceParams( 6, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x16_neon, 10), DistWtdSubpelAvgVarianceParams( 4, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x64_neon, 10), DistWtdSubpelAvgVarianceParams( 5, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x8_neon, 10), DistWtdSubpelAvgVarianceParams( 3, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x32_neon, 10), DistWtdSubpelAvgVarianceParams( 4, 2, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x4_neon, 10), DistWtdSubpelAvgVarianceParams( 2, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance4x16_neon, 10), DistWtdSubpelAvgVarianceParams( 6, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x16_neon, 12), DistWtdSubpelAvgVarianceParams( 4, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x64_neon, 12), DistWtdSubpelAvgVarianceParams( 5, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x8_neon, 12), DistWtdSubpelAvgVarianceParams( 3, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x32_neon, 12), DistWtdSubpelAvgVarianceParams( 4, 2, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x4_neon, 12), DistWtdSubpelAvgVarianceParams( 2, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance4x16_neon, 12), #endif // !CONFIG_REALTIME_ONLY }; INSTANTIATE_TEST_SUITE_P( NEON, AvxHBDDistWtdSubpelAvgVarianceTest, ::testing::ValuesIn(kArrayHBDDistWtdSubpelAvgVariance_neon)); #if !CONFIG_REALTIME_ONLY const ObmcSubpelVarianceParams kArrayHBDObmcSubpelVariance_neon[] = { ObmcSubpelVarianceParams( 7, 7, &aom_highbd_12_obmc_sub_pixel_variance128x128_neon, 12), ObmcSubpelVarianceParams( 7, 6, &aom_highbd_12_obmc_sub_pixel_variance128x64_neon, 12), ObmcSubpelVarianceParams( 6, 7, &aom_highbd_12_obmc_sub_pixel_variance64x128_neon, 12), ObmcSubpelVarianceParams( 6, 6, &aom_highbd_12_obmc_sub_pixel_variance64x64_neon, 12), ObmcSubpelVarianceParams( 6, 5, &aom_highbd_12_obmc_sub_pixel_variance64x32_neon, 12), ObmcSubpelVarianceParams( 5, 6, &aom_highbd_12_obmc_sub_pixel_variance32x64_neon, 12), ObmcSubpelVarianceParams( 5, 5, &aom_highbd_12_obmc_sub_pixel_variance32x32_neon, 12), ObmcSubpelVarianceParams( 5, 4, &aom_highbd_12_obmc_sub_pixel_variance32x16_neon, 12), ObmcSubpelVarianceParams( 4, 5, &aom_highbd_12_obmc_sub_pixel_variance16x32_neon, 12), ObmcSubpelVarianceParams( 4, 4, &aom_highbd_12_obmc_sub_pixel_variance16x16_neon, 12), ObmcSubpelVarianceParams(4, 3, &aom_highbd_12_obmc_sub_pixel_variance16x8_neon, 12), ObmcSubpelVarianceParams(3, 4, &aom_highbd_12_obmc_sub_pixel_variance8x16_neon, 12), ObmcSubpelVarianceParams(3, 3, &aom_highbd_12_obmc_sub_pixel_variance8x8_neon, 12), ObmcSubpelVarianceParams(3, 2, &aom_highbd_12_obmc_sub_pixel_variance8x4_neon, 12), ObmcSubpelVarianceParams(2, 3, &aom_highbd_12_obmc_sub_pixel_variance4x8_neon, 12), ObmcSubpelVarianceParams(2, 2, &aom_highbd_12_obmc_sub_pixel_variance4x4_neon, 12), ObmcSubpelVarianceParams( 6, 4, &aom_highbd_12_obmc_sub_pixel_variance64x16_neon, 12), ObmcSubpelVarianceParams( 4, 6, &aom_highbd_12_obmc_sub_pixel_variance16x64_neon, 12), ObmcSubpelVarianceParams(5, 3, &aom_highbd_12_obmc_sub_pixel_variance32x8_neon, 12), ObmcSubpelVarianceParams(3, 5, &aom_highbd_12_obmc_sub_pixel_variance8x32_neon, 12), ObmcSubpelVarianceParams(4, 2, &aom_highbd_12_obmc_sub_pixel_variance16x4_neon, 12), ObmcSubpelVarianceParams(2, 4, &aom_highbd_12_obmc_sub_pixel_variance4x16_neon, 12), ObmcSubpelVarianceParams( 7, 7, &aom_highbd_10_obmc_sub_pixel_variance128x128_neon, 10), ObmcSubpelVarianceParams( 7, 6, &aom_highbd_10_obmc_sub_pixel_variance128x64_neon, 10), ObmcSubpelVarianceParams( 6, 7, &aom_highbd_10_obmc_sub_pixel_variance64x128_neon, 10), ObmcSubpelVarianceParams( 6, 6, &aom_highbd_10_obmc_sub_pixel_variance64x64_neon, 10), ObmcSubpelVarianceParams( 6, 5, &aom_highbd_10_obmc_sub_pixel_variance64x32_neon, 10), ObmcSubpelVarianceParams( 5, 6, &aom_highbd_10_obmc_sub_pixel_variance32x64_neon, 10), ObmcSubpelVarianceParams( 5, 5, &aom_highbd_10_obmc_sub_pixel_variance32x32_neon, 10), ObmcSubpelVarianceParams( 5, 4, &aom_highbd_10_obmc_sub_pixel_variance32x16_neon, 10), ObmcSubpelVarianceParams( 4, 5, &aom_highbd_10_obmc_sub_pixel_variance16x32_neon, 10), ObmcSubpelVarianceParams( 4, 4, &aom_highbd_10_obmc_sub_pixel_variance16x16_neon, 10), ObmcSubpelVarianceParams(4, 3, &aom_highbd_10_obmc_sub_pixel_variance16x8_neon, 10), ObmcSubpelVarianceParams(3, 4, &aom_highbd_10_obmc_sub_pixel_variance8x16_neon, 10), ObmcSubpelVarianceParams(3, 3, &aom_highbd_10_obmc_sub_pixel_variance8x8_neon, 10), ObmcSubpelVarianceParams(3, 2, &aom_highbd_10_obmc_sub_pixel_variance8x4_neon, 10), ObmcSubpelVarianceParams(2, 3, &aom_highbd_10_obmc_sub_pixel_variance4x8_neon, 10), ObmcSubpelVarianceParams(2, 2, &aom_highbd_10_obmc_sub_pixel_variance4x4_neon, 10), ObmcSubpelVarianceParams( 6, 4, &aom_highbd_10_obmc_sub_pixel_variance64x16_neon, 10), ObmcSubpelVarianceParams( 4, 6, &aom_highbd_10_obmc_sub_pixel_variance16x64_neon, 10), ObmcSubpelVarianceParams(5, 3, &aom_highbd_10_obmc_sub_pixel_variance32x8_neon, 10), ObmcSubpelVarianceParams(3, 5, &aom_highbd_10_obmc_sub_pixel_variance8x32_neon, 10), ObmcSubpelVarianceParams(4, 2, &aom_highbd_10_obmc_sub_pixel_variance16x4_neon, 10), ObmcSubpelVarianceParams(2, 4, &aom_highbd_10_obmc_sub_pixel_variance4x16_neon, 10), ObmcSubpelVarianceParams( 7, 7, &aom_highbd_8_obmc_sub_pixel_variance128x128_neon, 8), ObmcSubpelVarianceParams(7, 6, &aom_highbd_8_obmc_sub_pixel_variance128x64_neon, 8), ObmcSubpelVarianceParams(6, 7, &aom_highbd_8_obmc_sub_pixel_variance64x128_neon, 8), ObmcSubpelVarianceParams(6, 6, &aom_highbd_8_obmc_sub_pixel_variance64x64_neon, 8), ObmcSubpelVarianceParams(6, 5, &aom_highbd_8_obmc_sub_pixel_variance64x32_neon, 8), ObmcSubpelVarianceParams(5, 6, &aom_highbd_8_obmc_sub_pixel_variance32x64_neon, 8), ObmcSubpelVarianceParams(5, 5, &aom_highbd_8_obmc_sub_pixel_variance32x32_neon, 8), ObmcSubpelVarianceParams(5, 4, &aom_highbd_8_obmc_sub_pixel_variance32x16_neon, 8), ObmcSubpelVarianceParams(4, 5, &aom_highbd_8_obmc_sub_pixel_variance16x32_neon, 8), ObmcSubpelVarianceParams(4, 4, &aom_highbd_8_obmc_sub_pixel_variance16x16_neon, 8), ObmcSubpelVarianceParams(4, 3, &aom_highbd_8_obmc_sub_pixel_variance16x8_neon, 8), ObmcSubpelVarianceParams(3, 4, &aom_highbd_8_obmc_sub_pixel_variance8x16_neon, 8), ObmcSubpelVarianceParams(3, 3, &aom_highbd_8_obmc_sub_pixel_variance8x8_neon, 8), ObmcSubpelVarianceParams(3, 2, &aom_highbd_8_obmc_sub_pixel_variance8x4_neon, 8), ObmcSubpelVarianceParams(2, 3, &aom_highbd_8_obmc_sub_pixel_variance4x8_neon, 8), ObmcSubpelVarianceParams(2, 2, &aom_highbd_8_obmc_sub_pixel_variance4x4_neon, 8), ObmcSubpelVarianceParams(6, 4, &aom_highbd_8_obmc_sub_pixel_variance64x16_neon, 8), ObmcSubpelVarianceParams(4, 6, &aom_highbd_8_obmc_sub_pixel_variance16x64_neon, 8), ObmcSubpelVarianceParams(5, 3, &aom_highbd_8_obmc_sub_pixel_variance32x8_neon, 8), ObmcSubpelVarianceParams(3, 5, &aom_highbd_8_obmc_sub_pixel_variance8x32_neon, 8), ObmcSubpelVarianceParams(4, 2, &aom_highbd_8_obmc_sub_pixel_variance16x4_neon, 8), ObmcSubpelVarianceParams(2, 4, &aom_highbd_8_obmc_sub_pixel_variance4x16_neon, 8), }; INSTANTIATE_TEST_SUITE_P(NEON, AvxHBDObmcSubpelVarianceTest, ::testing::ValuesIn(kArrayHBDObmcSubpelVariance_neon)); #endif // !CONFIG_REALTIME_ONLY #endif // CONFIG_AV1_HIGHBITDEPTH #endif // HAVE_NEON #if HAVE_NEON_DOTPROD const VarianceParams kArrayVariance_neon_dotprod[] = { VarianceParams(7, 7, &aom_variance128x128_neon_dotprod), VarianceParams(6, 6, &aom_variance64x64_neon_dotprod), VarianceParams(7, 6, &aom_variance128x64_neon_dotprod), VarianceParams(6, 7, &aom_variance64x128_neon_dotprod), VarianceParams(6, 6, &aom_variance64x64_neon_dotprod), VarianceParams(6, 5, &aom_variance64x32_neon_dotprod), VarianceParams(5, 6, &aom_variance32x64_neon_dotprod), VarianceParams(5, 5, &aom_variance32x32_neon_dotprod), VarianceParams(5, 4, &aom_variance32x16_neon_dotprod), VarianceParams(4, 5, &aom_variance16x32_neon_dotprod), VarianceParams(4, 4, &aom_variance16x16_neon_dotprod), VarianceParams(4, 3, &aom_variance16x8_neon_dotprod), VarianceParams(3, 4, &aom_variance8x16_neon_dotprod), VarianceParams(3, 3, &aom_variance8x8_neon_dotprod), VarianceParams(3, 2, &aom_variance8x4_neon_dotprod), VarianceParams(2, 3, &aom_variance4x8_neon_dotprod), VarianceParams(2, 2, &aom_variance4x4_neon_dotprod), #if !CONFIG_REALTIME_ONLY VarianceParams(2, 4, &aom_variance4x16_neon_dotprod), VarianceParams(4, 2, &aom_variance16x4_neon_dotprod), VarianceParams(3, 5, &aom_variance8x32_neon_dotprod), VarianceParams(5, 3, &aom_variance32x8_neon_dotprod), VarianceParams(4, 6, &aom_variance16x64_neon_dotprod), VarianceParams(6, 4, &aom_variance64x16_neon_dotprod), #endif }; INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, AvxVarianceTest, ::testing::ValuesIn(kArrayVariance_neon_dotprod)); const GetSseSumParams kArrayGetSseSum8x8Quad_neon_dotprod[] = { GetSseSumParams(7, 7, &aom_get_var_sse_sum_8x8_quad_neon_dotprod, 0), GetSseSumParams(6, 6, &aom_get_var_sse_sum_8x8_quad_neon_dotprod, 0), GetSseSumParams(5, 5, &aom_get_var_sse_sum_8x8_quad_neon_dotprod, 0), GetSseSumParams(5, 4, &aom_get_var_sse_sum_8x8_quad_neon_dotprod, 0) }; INSTANTIATE_TEST_SUITE_P( NEON_DOTPROD, GetSseSum8x8QuadTest, ::testing::ValuesIn(kArrayGetSseSum8x8Quad_neon_dotprod)); const GetSseSumParamsDual kArrayGetSseSum16x16Dual_neon_dotprod[] = { GetSseSumParamsDual(7, 7, &aom_get_var_sse_sum_16x16_dual_neon_dotprod, 0), GetSseSumParamsDual(6, 6, &aom_get_var_sse_sum_16x16_dual_neon_dotprod, 0), GetSseSumParamsDual(5, 5, &aom_get_var_sse_sum_16x16_dual_neon_dotprod, 0), GetSseSumParamsDual(5, 4, &aom_get_var_sse_sum_16x16_dual_neon_dotprod, 0) }; INSTANTIATE_TEST_SUITE_P( NEON_DOTPROD, GetSseSum16x16DualTest, ::testing::ValuesIn(kArrayGetSseSum16x16Dual_neon_dotprod)); INSTANTIATE_TEST_SUITE_P( NEON_DOTPROD, AvxMseTest, ::testing::Values(MseParams(3, 3, &aom_mse8x8_neon_dotprod), MseParams(3, 4, &aom_mse8x16_neon_dotprod), MseParams(4, 4, &aom_mse16x16_neon_dotprod), MseParams(4, 3, &aom_mse16x8_neon_dotprod))); #endif // HAVE_NEON_DOTPROD #if HAVE_SVE #if CONFIG_AV1_HIGHBITDEPTH const VarianceParams kArrayHBDVariance_sve[] = { VarianceParams(7, 7, &aom_highbd_12_variance128x128_sve, 12), VarianceParams(7, 6, &aom_highbd_12_variance128x64_sve, 12), VarianceParams(6, 7, &aom_highbd_12_variance64x128_sve, 12), VarianceParams(6, 6, &aom_highbd_12_variance64x64_sve, 12), VarianceParams(6, 5, &aom_highbd_12_variance64x32_sve, 12), VarianceParams(5, 6, &aom_highbd_12_variance32x64_sve, 12), VarianceParams(5, 5, &aom_highbd_12_variance32x32_sve, 12), VarianceParams(5, 4, &aom_highbd_12_variance32x16_sve, 12), VarianceParams(4, 5, &aom_highbd_12_variance16x32_sve, 12), VarianceParams(4, 4, &aom_highbd_12_variance16x16_sve, 12), VarianceParams(4, 3, &aom_highbd_12_variance16x8_sve, 12), VarianceParams(3, 4, &aom_highbd_12_variance8x16_sve, 12), VarianceParams(3, 3, &aom_highbd_12_variance8x8_sve, 12), VarianceParams(3, 2, &aom_highbd_12_variance8x4_sve, 12), VarianceParams(2, 3, &aom_highbd_12_variance4x8_sve, 12), VarianceParams(2, 2, &aom_highbd_12_variance4x4_sve, 12), VarianceParams(7, 7, &aom_highbd_10_variance128x128_sve, 10), VarianceParams(7, 6, &aom_highbd_10_variance128x64_sve, 10), VarianceParams(6, 7, &aom_highbd_10_variance64x128_sve, 10), VarianceParams(6, 6, &aom_highbd_10_variance64x64_sve, 10), VarianceParams(6, 5, &aom_highbd_10_variance64x32_sve, 10), VarianceParams(5, 6, &aom_highbd_10_variance32x64_sve, 10), VarianceParams(5, 5, &aom_highbd_10_variance32x32_sve, 10), VarianceParams(5, 4, &aom_highbd_10_variance32x16_sve, 10), VarianceParams(4, 5, &aom_highbd_10_variance16x32_sve, 10), VarianceParams(4, 4, &aom_highbd_10_variance16x16_sve, 10), VarianceParams(4, 3, &aom_highbd_10_variance16x8_sve, 10), VarianceParams(3, 4, &aom_highbd_10_variance8x16_sve, 10), VarianceParams(3, 3, &aom_highbd_10_variance8x8_sve, 10), VarianceParams(3, 2, &aom_highbd_10_variance8x4_sve, 10), VarianceParams(2, 3, &aom_highbd_10_variance4x8_sve, 10), VarianceParams(2, 2, &aom_highbd_10_variance4x4_sve, 10), VarianceParams(7, 7, &aom_highbd_8_variance128x128_sve, 8), VarianceParams(7, 6, &aom_highbd_8_variance128x64_sve, 8), VarianceParams(6, 7, &aom_highbd_8_variance64x128_sve, 8), VarianceParams(6, 6, &aom_highbd_8_variance64x64_sve, 8), VarianceParams(6, 5, &aom_highbd_8_variance64x32_sve, 8), VarianceParams(5, 6, &aom_highbd_8_variance32x64_sve, 8), VarianceParams(5, 5, &aom_highbd_8_variance32x32_sve, 8), VarianceParams(5, 4, &aom_highbd_8_variance32x16_sve, 8), VarianceParams(4, 5, &aom_highbd_8_variance16x32_sve, 8), VarianceParams(4, 4, &aom_highbd_8_variance16x16_sve, 8), VarianceParams(4, 3, &aom_highbd_8_variance16x8_sve, 8), VarianceParams(3, 4, &aom_highbd_8_variance8x16_sve, 8), VarianceParams(3, 3, &aom_highbd_8_variance8x8_sve, 8), VarianceParams(3, 2, &aom_highbd_8_variance8x4_sve, 8), VarianceParams(2, 3, &aom_highbd_8_variance4x8_sve, 8), VarianceParams(2, 2, &aom_highbd_8_variance4x4_sve, 8), #if !CONFIG_REALTIME_ONLY VarianceParams(6, 4, &aom_highbd_12_variance64x16_sve, 12), VarianceParams(4, 6, &aom_highbd_12_variance16x64_sve, 12), VarianceParams(5, 3, &aom_highbd_12_variance32x8_sve, 12), VarianceParams(3, 5, &aom_highbd_12_variance8x32_sve, 12), VarianceParams(4, 2, &aom_highbd_12_variance16x4_sve, 12), VarianceParams(2, 4, &aom_highbd_12_variance4x16_sve, 12), VarianceParams(6, 4, &aom_highbd_10_variance64x16_sve, 10), VarianceParams(4, 6, &aom_highbd_10_variance16x64_sve, 10), VarianceParams(5, 3, &aom_highbd_10_variance32x8_sve, 10), VarianceParams(3, 5, &aom_highbd_10_variance8x32_sve, 10), VarianceParams(4, 2, &aom_highbd_10_variance16x4_sve, 10), VarianceParams(2, 4, &aom_highbd_10_variance4x16_sve, 10), VarianceParams(6, 4, &aom_highbd_8_variance64x16_sve, 8), VarianceParams(4, 6, &aom_highbd_8_variance16x64_sve, 8), VarianceParams(5, 3, &aom_highbd_8_variance32x8_sve, 8), VarianceParams(3, 5, &aom_highbd_8_variance8x32_sve, 8), VarianceParams(4, 2, &aom_highbd_8_variance16x4_sve, 8), VarianceParams(2, 4, &aom_highbd_8_variance4x16_sve, 8), #endif }; INSTANTIATE_TEST_SUITE_P(SVE, AvxHBDVarianceTest, ::testing::ValuesIn(kArrayHBDVariance_sve)); #endif // CONFIG_AV1_HIGHBITDEPTH #endif // HAVE_SVE } // namespace